---
title: "StatsCan Data"
author: "Isadora Borges Monroy"
date: "04/12/2024"
output: html_document
editor_options: 
  chunk_output_type: console
---

GSS34
```{r setup, include = FALSE}
# Import data
.libPaths("//mcg-main/equipes/Projet 7107/_Rpackages")
GSS34 <- haven::read_dta("S:/GSS_ESG_34/GSS_ESG_34_v4/GSS_ESG_34_v4/data_donnees/stata/gss_esg_34_analm_f1_v3.dta")
GSS35 <- haven::read_dta("S:/GSS_ESG_35/GSS_ESG_35_v2/data_donnees/stata/gss_esg_35_analm_f3_v2.dta")
ICC <- haven::read_dta("S:/ICC_RCC_ED_2020/ICC_RCC_ED_2020_v1/data_donnees/stata/icc_ed_2020_f1_v1.dta")
library(magrittr)
library(janitor) 
library(stargazer)
library(tidyverse)
options(scipen = 99)
setwd("//mcg-main/equipes/Projet 7107/Isadora/")

# Check
head(GSS34)
glimpse(GSS34)

#Eventually
#GSS34imputed <- read_rds("GSS_V_2019_imputed.R")
# 
theme <- theme_bw() +
 theme(plot.title = element_text(hjust = 0.5),
       aspect.ratio = 1,
       legend.position="top", #legend.position="right",
       panel.background = element_rect(fill = "white"),
       #plot.margin = margin (0.1,0.1,0.1,0.1, "cm"),
       strip.background = element_blank(),
       panel.grid.minor=element_blank(),
       panel.grid.major= element_line(color = "seashell1"),
       plot.background=element_blank(),
       title = element_text(size = 26),
#    axis.text.x = element_blank(), #remove tickmarks
    axis.title.x = element_text(size = 20, colour = "black"),
    axis.title.y = element_text(size = 20, colour = "black"),
    legend.title = element_text(size = 18, colour = "black"),
    legend.text = element_text(size = 16, colour = "black"),
axis.text = element_text(size = 16)
)
```

#Recode Variables

## DVs

```{r DV standardized confidence}
##GSS34
# Examine confidence in police CIP_10
# Note it is coded from 1 to 4 where 1 is "A great deal of confidence" and 4 is "No confidence at all"
str(GSS34$CIP_10) # Numeric
table(GSS34$CIP_10)

# Reverse coding such that higher values indicates higher confidence
GSS34 %<>% 
  mutate(CIP_10 = case_when(
    CIP_10 == 1 ~ 4,
    CIP_10 == 2 ~ 3,
    CIP_10 == 3 ~ 2,
    CIP_10 == 4 ~ 1,
    TRUE ~ NA_real_
  ))

# Check
table(GSS34$CIP_10)

# Examine confidence in criminal courts PCC_145 (why?)
table(GSS34$PCC_145)

# Reverse coding such that higher values indicates higher confidence
GSS34 %<>% 
  mutate(PCC_145 = case_when(
    PCC_145 == 1 ~ 4,
    PCC_145 == 2 ~ 3,
    PCC_145 == 3 ~ 2,
    PCC_145 == 4 ~ 1,
    TRUE ~ NA_real_
  ))

# Check
table(GSS34$PCC_145)

# Created standardized police confidence variable
GSS34 %<>%
  mutate(`standardized police confidence` = scales::rescale(CIP_10 - (select(., c(CIP_10, PCC_145))) %>% rowMeans(na.rm = TRUE)))

# Check
summary(GSS34$`standardized police confidence`)
summary(GSS34$`raw police confidence`)
GSS34%<>%rename(`raw police confidence`=CIP_10)

##ICC
# Examine trust in police variable
str(ICC$tii_05a) # numeric
table(ICC$tii_05a)

# Note: It is coded from 1 to 5 where 1 means "no trust at all" and 5 means " a great deal of trust"

# Recode "Not stated" category for all trust variables
ICC %<>% 
  mutate(tii_05a = ifelse(tii_05a == 9, NA, tii_05a),
         tii_05b = ifelse(tii_05b == 9, NA, tii_05b),
         tii_05c = ifelse(tii_05c == 9, NA, tii_05c),
         tii_05d = ifelse(tii_05d == 9, NA, tii_05d),
         tii_05e = ifelse(tii_05e == 9, NA, tii_05e),
         tii_05f = ifelse(tii_05f == 9, NA, tii_05f),
         tii_05g = ifelse(tii_05g == 9, NA, tii_05g),
         tii_05h = ifelse(tii_05h == 9, NA, tii_05h),
         tii_05i = ifelse(tii_05i == 9, NA, tii_05i),
         tii_05j = ifelse(tii_05j == 9, NA, tii_05j),
         tii_05k = ifelse(tii_05k == 9, NA, tii_05k),
         tii_05l = ifelse(tii_05l == 9, NA, tii_05l),
         tii_05m = ifelse(tii_05m == 9, NA, tii_05m))


apply(ICC[c(65:77)], MARGIN = 2, FUN = table)

# Create standardized police confidence variable
ICC %<>%
  mutate(`standardized police confidence` = scales::rescale(tii_05a - (select(., starts_with("tii_")) %>% rowMeans(na.rm = TRUE))))

summary(ICC$`standardized police confidence`)
ICC%<>%rename(`raw police confidence`=tii_05a)
summary(ICC$`raw police confidence`)

##GSS35
# Examine trust in police variable
str(GSS35$CII_10)
table(GSS35$CII_10)

# Note that they are coded such that 1 = no confidence and 5 = a great deal of confidence and 9 = Not stated

# Recode "Not stated" category as NA
GSS35 %<>%  
  mutate(CII_10 = ifelse(CII_10 == 9, NA, CII_10), # Police
         CII_15 = ifelse(CII_15 == 9, NA, CII_15), # Justice system and courts
         CII_30 = ifelse(CII_30 == 9, NA, CII_30), # School system
         CII_40 = ifelse(CII_40 == 9, NA, CII_40), # Federal Parliament
         CII_45 = ifelse(CII_45 == 9, NA, CII_45), # Banks
         CII_50 = ifelse(CII_50 == 9, NA, CII_50), # Major corporations
         CII_55 = ifelse(CII_55 == 9, NA, CII_55), # Local merchants and business people
         CII_60 = ifelse(CII_60 == 9, NA, CII_60)) # Canadian media

# Check
GSS35 %>% select(starts_with("CII_")) %>% map(~tabyl(.) %>% adorn_pct_formatting(digits = 2))

# Create standardized police variable
GSS35 %<>%
  mutate(`standardized police confidence`= 
                   scales::rescale(CII_10 - (select(., starts_with("CII_")) %>% rowMeans(na.rm = TRUE))))

# Check
summary(GSS35$`standardized police confidence`)
GSS35%<>%rename(`raw police confidence`=CII_10)
summary(GSS35$`raw police confidence`)

```

#IVs

```{r Race recode}
##GSS34
# Explore race variables
str(GSS34$vismin) # numeric
table(GSS34$vismin)

str(GSS34$ABM_01A) # numeric
table(GSS34$ABM_01A)

# Replace numeric code with appropriate category
GSS34 %<>%
  mutate(Ethnicity = case_when(
    ABM_01A == 2 ~ "Indigenous",
    vismin == 3 ~ "Black",
    vismin == 13 & ABM_01A == 1 ~ "White", 
    vismin == 1 | vismin == 2 | vismin == 4 | vismin == 5 | vismin == 6 | vismin == 7 | vismin == 8 | vismin == 9 | vismin == 10 | vismin == 11 | vismin == 12 ~ "Other Person of Colour",
    TRUE ~ NA_character_
  ),
  Ethnicity = factor(Ethnicity, levels = c("White", "Black", "Indigenous", "Other Person of Colour"))) 

# Check recoding
levels(GSS34$Ethnicity)
summary(GSS34$Ethnicity)
str(GSS34$Ethnicity)

# Create another ethnicity variable
GSS34$Ethnicity2 <- GSS34$Ethnicity

# Lump all PoC into one category 
GSS34$Ethnicity2 %<>% 
  fct_lump_n(., n = 1) %>% 
  recode("Other" = "PoC")

# Check
summary(GSS34$Ethnicity)
summary(GSS34$Ethnicity2)

##ICC
# Explore race variables
str(ICC$vismin) # numeric
table(ICC$vismin)

str(ICC$iident) # numeric
table(ICC$iident)

str(ICC$iidflag) # numeric
table(ICC$iidflag)

# Replace numeric code with appropriate category
ICC %<>%
  mutate(Ethnicity = case_when(
    iidflag == 1 ~ "Indigenous",
    vismin == 3 ~ "Black",
    vismin == 13 & iidflag == 2 ~ "White",
    vismin == 1 | vismin == 2 | vismin == 4 | vismin == 5 | vismin == 6 | vismin == 7 | vismin == 8 | vismin == 9 | vismin == 10 | vismin == 11 | vismin == 12 ~ "Other Person of Colour",
    TRUE ~ NA_character_
  ),
  Ethnicity = factor(Ethnicity, levels = c("White", "Black", "Indigenous", "Other Person of Colour"))) 

# Check recoding
levels(ICC$Ethnicity)
summary(ICC$Ethnicity)
str(ICC$Ethnicity)

# Create another ethnicity variable
ICC$Ethnicity2 <- ICC$Ethnicity

# Lump all PoC into one category 
ICC$Ethnicity2 %<>% 
  fct_lump_n(., n = 1) %>% 
  recode("Other" = "PoC")

# Check
summary(ICC$Ethnicity)
summary(ICC$Ethnicity2)

##GSS35
# Explore race variables
str(GSS35$vismin) # numeric
table(GSS35$vismin)

str(GSS35$ABM_01A) # numeric
table(GSS35$ABM_01A)

# Replace numeric code with appropriate category
GSS35 %<>%
  mutate(Ethnicity = case_when(
    ABM_01A == 2 ~ "Indigenous",
    vismin == 3 ~ "Black",
    vismin == 13 & ABM_01A == 1 ~ "White", 
    vismin == 1 | vismin == 2 | vismin == 4 | vismin == 5 | vismin == 6 | vismin == 7 | vismin == 8 | vismin == 9 | vismin == 10 | vismin == 11 | vismin == 12 ~ "Other Person of Colour",
    TRUE ~ NA_character_
  ),
  Ethnicity = factor(Ethnicity, levels = c("White", "Black", "Indigenous", "Other Person of Colour"))) 

# Check recoding
tabyl(GSS35$Ethnicity) %>% adorn_pct_formatting(digits = 2)

# Create another ethnicity variable
GSS35$Ethnicity2 <- GSS35$Ethnicity

# Lump all PoC into one category 
GSS35$Ethnicity2 %<>% 
  fct_lump_n(., n = 1) %>% 
  recode("Other" = "PoC")

# Check
levels(GSS35$Ethnicity2)
tabyl(GSS35$Ethnicity2)
```

```{r crosstabs race an confidence}

GSS34 %>% tabyl(Ethnicity, `raw police confidence`) %>% adorn_percentages("row") %>%  adorn_pct_formatting(digits = 2)
ICC %>% tabyl(Ethnicity, `raw police confidence`)%>% adorn_percentages("row") %>%  adorn_pct_formatting(digits = 2)
GSS35 %>% tabyl(Ethnicity, `raw police confidence`)%>% adorn_percentages("row") %>%  adorn_pct_formatting(digits = 2)

GSS34$Survey <- "GSS34"
GSS35$Survey <- "GSS35"
ICC$Survey <- "ICC"

mini <- bind_rows(GSS34 %>% select(Ethnicity, Ethnicity2, `raw police confidence`, `standardized police confidence`, Survey), 
                  GSS35 %>% select(Ethnicity, Ethnicity2, `raw police confidence`, `standardized police confidence`, Survey), ICC %>% select(Ethnicity, Ethnicity2, `raw police confidence`, `standardized police confidence`, Survey))

mini$Survey%<>%fct_relevel("GSS34", "ICC", "GSS35")
write_rds(GSS34, "GSS34.R")
write_rds(GSS35, "GSS35.R")
write_rds(ICC, "ICC.R")
write_rds(mini, "mini.R")

summary(mini$`raw police confidence`)

mini %>% group_by(`Ethnicity`, Survey) %>% 
  group_modify(~ mean_se(.x$`raw police confidence`, mult = 1.96)) %>% 
  #mutate(Ethnicity = `Group`) %>% 
  ggplot(aes(x = Ethnicity, y = y, color = Survey, fill = Survey)) +
  geom_hline(yintercept = 2.5, linewidth = 1, color = "red", alpha = .5)+
  geom_point(position = position_dodge(0.9), size=2, shape=21)+
  geom_smooth(method = lm, se = TRUE)+
  geom_errorbar(aes(ymin = ymin, ymax = ymax ), width = 0.25, position = position_dodge(0.9)) +
  scale_color_manual(values = c("darkgreen", "blue", "#CF8729"))+
  scale_fill_manual(values = c("darkgreen", "blue", "#CF8729"))+
  guides(fill = "none")+
  scale_y_continuous("Score: 
                     \nlow      \u2194      high ", limits = c(1,5))+
  # facet_wrap(`Fourth variable`~.)+
  ggtitle("Grouped confidence in police by surveys", subtitle = "unstandardized 1-5 scores")+
  theme()

summary(mini$`standardized police confidence`)
mini %>% group_by(`Ethnicity`, Survey) %>% 
  group_modify(~ mean_se(.x$`standardized police confidence`, mult = 1.96)) %>% 
  #mutate(Ethnicity = `Group`) %>% 
  ggplot(aes(x = Ethnicity, y = y, color = Survey, fill = Survey)) +
  geom_hline(yintercept = 0.5, linewidth = 1, color = "red", alpha = .5)+
  geom_point(position = position_dodge(0.9), size=2, shape=21)+
  geom_smooth(method = lm, se = TRUE)+
  geom_errorbar(aes(ymin = ymin, ymax = ymax ), width = 0.25, position = position_dodge(0.9)) +
  scale_color_manual(values = c("darkgreen", "blue", "#CF8729"))+
  scale_fill_manual(values = c("darkgreen", "blue", "#CF8729"))+
  guides(fill = "none")+
  scale_y_continuous("Score: 
                     \nlow      \u2194      high ", limits = c(0.45,.65))+
  # facet_wrap(`Fourth variable`~.)+
  ggtitle("Grouped confidence in police by surveys", subtitle = "standardized 0-1 scores")+
  theme()
```


```{r Poverty}
# Examine inc variable
str(GSS34$inc)
summary(GSS34$inc)

# Create income variable
GSS34 %<>%  
  mutate(Income = case_when(
   inc %in% 0:20001 ~ "Below poverty line",
   inc > 20000 & inc != 999999996 ~ "Above poverty", # Recode "Valid skip" category as NA
   TRUE ~ NA_character_))

str(GSS34$Income)

GSS34imputed2019$Income %<>% fct_infreq()

# Check
GSS34%$%summary(Income)
```

```{r Immigration}
# Examine
table(GSS34$brthcan)

# Rename immigration status variable
GSS34 %<>% rename("Place of birth" = brthcan) 

# Recode categories
GSS34$`Place of birth` %<>% recode("1" = "Canada", 
                                "2" = "Other", 
                                "9" = NA_character_) %>% factor()

# Check
summary(GSS34$`Place of birth`)

# Create Years in Canada variable
GSS34 %<>% 
  mutate(`Years in Canada` = case_when(
    `Place of birth` == 1 ~ NA_real_, # Born in Canada 
    BPR_17 == 9996 ~ NA_real_, # Skipped question
    BPR_17 == 9999 ~ NA_real_, # Not stated
    TRUE ~ 2019 - BPR_17
  ))

# Check
GSS34 %>% select(BPR_17, `Years in Canada`)
```

```{r Gender}
# Examine
str(GSS34$gender)
table(GSS34$gender)

# Rename variable
GSS34 %<>% rename(Gender = gender)

# Recode Gender variable
GSS34 %<>% 
  mutate(Gender = case_when(
    Gender == 1 ~ "Male",
    Gender == 2 | Gender == 3 ~ "Women trans and non-binary",
    TRUE ~ NA_character_
  ),
  Gender = factor(Gender))

# Check
summary(GSS34$Gender)
```

## Controls

```{r Age and Cohorts}
# Rename age variable
GSS34 %<>% rename(Age = age)

# Create Cohort variable
GSS34 %<>% 
 mutate(Cohort = case_when(Age %in% c(18:23) ~ "GenZ and Millennial", 
                           Age %in% c(24:39) ~ "GenZ and Millennial", 
                           Age %in% c(40:55) ~ "Gen X", 
                           Age %in% c(56:74) ~ "Ok Boomer", 
                           Age > 74 ~ "Silent", 
                           TRUE ~ NA_character_),
        Cohort = factor(Cohort, levels = c("GenZ and Millennial", "Gen X", "Ok Boomer", "Silent")))

# Check
GSS34%$%summary(Cohort)
levels(GSS34$Cohort)

```

```{r Education}
# Examine variable
table(GSS34$EHG1_01)

# Create Education variable
GSS34 %<>% mutate( 
    Education = case_when(
      EHG1_01 %in% c(1:2) ~ "Low", # Up to (including) Completed secondary/ high school
      EHG1_01 %in%  c(3:5) ~ "Medium", # Trades, technical, college, university certificate/diploma below bachelor's
      EHG1_01 %in%  c(6:7) ~ "High", # Bachelor's degree and higher
      TRUE ~ NA_character_) %>%  
      fct_relevel(., "Low", "Medium", "High"))

# Check
GSS34%$%summary(Education)

```


```{r Language}
# Examine
table(GSS34$lanint)

# Recode 
GSS34 %<>% 
  rename(Language = lanint) %>% 
  mutate(Language = ifelse(Language == "1", "English", "French"))

# Check
table(GSS34$Language)
```


```{r Province}
# Examine
table(GSS34$prv)

# Recode categories
GSS34 %<>% 
  mutate(prv = case_when(
         prv == 10 ~ "Newfoundland and Labrador",
         prv == 11 ~ "Prince Edward Island",
         prv == 12 ~ "Nova Scotia",
         prv == 13 ~ "New Brunswick",
         prv == 24 ~ "Quebec",
         prv == 35 ~ "Ontario",
         prv == 46 ~ "Manitoba",
         prv == 47 ~ "Saskatchewan",
         prv == 48 ~ "Alberta", 
         prv == 59 ~ "British Columbia",
         prv == 60 ~ "Yukon",
         prv == 61 ~ "Northwest Territories",
         prv == 62 ~ "Nunavut"))

# Rename variable
GSS34 %<>% rename(Province = prv)

# Reorder categories
GSS34$Province %<>% fct_relevel(.,"Ontario")

# Check
levels(GSS34$Province)
table(GSS34$Province)

# Create Regions variable
GSS34$Regions <- GSS34$Province

# Grouping in appropriate regions
GSS34$Regions %<>% 
  fct_collapse(., "Atlantic" = c("Newfoundland and Labrador", "Prince Edward Island", "Nova Scotia", "New Brunswick"), 
                  "Prairie" = c("Manitoba","Saskatchewan","Alberta"),
                  "North" = c("Yukon", "Northwest Territories", "Nunavut"))

# Check
table(GSS34$Regions)
```


```{r Final prep}
# Create dataframe with variables of interest
GSS34 %<>% 
  select(., Ethnicity, Ethnicity2, Income, `Place of birth`, Gender, Age, Cohort, Education, Language, Province, Regions, `standardized police confidence`, WGHT_PER, recid)

# Change character variables to factor
GSS34 %<>% mutate_if(is.character, factor)

# Create dataset
write_rds(GSS34, "GSS_V_2019.R") 
```

#### Imputation 
```{r missingness}
naniar::miss_var_summary(GSS34)  %>% mutate_if(is.numeric, round,2) %>% gt::gt()
```


```{r Imputation process}
# Import dataset
GSS34 <- readRDS("GSS_V_2019.R")

# Load package
library(mice)

# Rename variables
GSS34 %<>% 
  rename(PoB= `Place of birth`, stdPoliceConfidence =`standardized police confidence`)

# Check
colnames(GSS34)

# Imputation
imp <- mice(GSS34, maxit = 0) 
meth <- imp$method
predM <- imp$predictorMatrix

# Specify which variables should NOT be used for PREDICTION (outcomes, survey weights)
# Variables that should NOT be IMPUTED (outcomes, main IV, survey weights)
predM[, c("stdPoliceConfidence", "Language", "Province", "Regions", "WGHT_PER", "recid")] = 0 # Imputed on Gender

# Note that diff methods must be used to impute continuous, binary, and ordinal vars
# Specify which methods for the diff variables

# None for the ones that shouldn't be imputed
meth[c("stdPoliceConfidence", "Language", "Province", "Regions", "WGHT_PER", "recid")]="" # Imputed on Gender

# Numeric variables
meth[c("Age")]="pmm" # numeric
meth[c( "Ethnicity2", "Income", "PoB")]="logreg" # dummy
meth[c("Ethnicity", "Gender")]="polyreg" # Unordered categorical factor / Imputed on Gender as well since it had NA values which caused error when creating GSS34imputed.rake  
meth[c("Cohort", "Education")] = "polr" # Ordered categorical factor

#### Run the multiple (m=5) imputation
set.seed(1986)

# Select and run the next three lines together to see how quickly the impute
lubridate::now() 
imputed  <-  mice(GSS34, method=meth, predictorMatrix=predM, m=5) 
lubridate::now() 

# Start: 2022-02-16 11:08:09 PST
# End: 2022-02-16 11:10:23 PST
# Total: 2 minutes

# Create dataframe after imputation
GSS34imputed <- mice::complete(imputed) 

# Check for missingness
test <- GSS34imputed %>% map_GSS34(~(sum(is.na(.x))) #/length(x) #as proportion of length
                   ) %>% as_tibble() %>% transpose(.) %>% simplify_all()

names(test) <- "Imputed Variables"
str(GSS34imputed)

# Rename back to tidyverse convention
GSS34imputed %<>% 
  rename(`Place of birth`= PoB, `standardized police confidence`=stdPoliceConfidence)

write_rds(GSS34imputed, "GSS_V_2019_imputed.R")
```

# Weighing data

```{r Reweighting}
# Import dataset
GSS34imputed <- read_rds("GSS_V_2019_imputed.R")
GSS34imputed <- readRDS("GSS_V_2019_imputed.R")


# Computing survey weights (CES has: province, gender, age group, and education; we will add: Ethnicity2)
# https://www.r-bloggers.com/survey-computing-your-own-post-stratification-weights-in-r/

# Load package
library(survey)

# Create the unweighted survey object
GSS34imputed.unweighted <- survey::svydesign(ids = ~1, data = GSS34imputed) # ids = ~1 means we don't have any weights. R gives us a warning saying we're not applying any weights

# Create the population data frames
# Provinces
table(GSS34imputed$Regions)

# Create provinces data set
provinces <- read_csv("T:/Projet 7107/Isadora/census_2016_population.CSV")

# Separate into different regions
provinces %<>%
  select(`Geographic name`, `Population, 2016`) %>% 
  drop_na(`Population, 2016`) %>% 
  mutate(Regions = `Geographic name`)

provinces$Regions %<>% 
  fct_collapse(., "Atlantic" = c("Newfoundland and Labrador", "Prince Edward Island", "Nova Scotia", "New Brunswick"), 
                  "Prairie" = c("Manitoba","Saskatchewan","Alberta") ,
                  "North" = c("Northwest Territories", "Yukon", "Nunavut"))

# Check
head(provinces)

#POPULATION DATA FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
# Remove Canada category Note: Population = 35151728 #POPULATION DATA FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
provinces %<>% 
  select (-`Geographic name`) %>%
  filter(Regions != "Canada") %>% 
  group_by(Regions) %>%
  summarise(sum = sum(`Population, 2016`),
            prop = sum(`Population, 2016`)/35151728, #POPULATION DATA AND POPORTIONS FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
            Freq = nrow(GSS34imputed) * prop)

# Check
head(provinces)

# Region 
region.dist <- data.frame(provinces %>% 
                            select(Regions, Freq) %>% 
                            mutate(Regions = as.character(Regions)))

# Gender
gender.dist <- data.frame(Gender = c("Women trans and non-binary", "Male"),
                          Freq = nrow(GSS34imputed) * c(0.514240946, 0.485759054))


# Cohort
cohort.dist <- data.frame(Cohort = c("GenZ and Millennial", "Gen X", "Ok Boomer", "Silent"),
                          Freq = nrow(GSS34imputed) * c(0.354978037, 
                                                     0.278681465, 
                                                     0.27593917, 
                                                     0.090401327))

# Education
edu.dist <- data.frame(Education = c("Low", "Medium", "High"),
                       Freq = nrow(GSS34imputed) * c(0.352, 0.363, 0.285))

# Race
race.dist <- data.frame(Ethnicity2 = c("White","PoC"),
                       Freq = nrow(GSS34imputed) * c(0.728719026, 0.271280974)) # The category frequencies were flipped

# Compute the weights by putting together the marginal distributions from before with our data
GSS34imputed.rake <- rake(design = GSS34imputed.unweighted,
                       sample.margins = list(~Regions, ~Gender, ~Cohort, ~Education, ~Ethnicity2),
                       population.margins = list(region.dist, gender.dist, cohort.dist, edu.dist, race.dist))

# Trim the weights
summary(weights(GSS34imputed.rake)) # Some of the weights are much too large & too small

GSS34imputed.rake.trimmed <- trimWeights(GSS34imputed.rake, lower = 0.3, upper = 3, strict=TRUE) 
summary(weights(GSS34imputed.rake.trimmed)) # Now all the weights range between 0.3 and 3, which is more sensible
str(GSS34imputed.rake.trimmed)

# Compare the data before and after 
prop.table(svytable(~Ethnicity2, GSS34imputed.unweighted))
prop.table(svytable(~Ethnicity2, GSS34imputed.rake.trimmed))

# Comparing svy mean and total in raked data
svymean(~Ethnicity2, GSS34imputed.rake.trimmed)
svytotal(~Ethnicity2, GSS34imputed.rake.trimmed)

svymean(~Ethnicity2, GSS34imputed.unweighted) # and in unweighted data
svytotal(~Ethnicity2, GSS34imputed.unweighted)

# Creating weights object in original GSS34imputed dataframe so we can use in our regressions:
GSS34imputed$WeightsRace <- weights(GSS34imputed.rake.trimmed)

# Create data set
write_rds(GSS34imputed, "GSS_V_2019_imputed.R")

```

## Switching between renames

```{r Quick Renames block for ggeffect and imputation}
# Rename
GSS34 %<>%
  rename(`Place of birth`= PoB, `standardized police confidence`= stdPoliceConfidence)
GSS34imputed %<>%
  rename(PoB=`Place of birth`, stdPoliceConfidence = `standardized police confidence`)

# Check
GSS34imputed$stdPoliceConfidence
```

# Analysis with finalized cleaned weighted and imputed data

```{r Summaries, include=FALSE}
# Import imputed data set
GSS34imputed <- read_rds("GSS_V_2019_imputed.R")

getMode <- function(GSS34) {
  ux <- na.omit(unique(GSS34))
  ux[which.max(tabulate(match(GSS34, ux)))]
}

GSS34 <- GSS34imputed %>% select(-recid, -WGHT_PER, -WeightsRace)
Mode <- apply(GSS34%>% select(where(is.factor)), 2, getMode)

p <- GSS34imputed %>% select_if(is.factor) %>% map_GSS34(~(tabyl(.) %>% adorn_totals("row") %>% adorn_pct_formatting()))
View(p)
p %>% flextable() %>% 
  save_as_docx(path = "univariates2 GSS2019.docx")
GSS34imputed2019 %>% select_if(is.factor) %>% colnames()


# Error running this code
# GSS34imputed %>% select_if(is.factor)%>% map(~(sd(.)))

# Create dummy variables
dummies::dummy(GSS34imputed$Cohort) %>%  psych::describe(., fast = TRUE)
dummies::dummy(GSS34imputed$Regions) %>% psych::describe(., fast = TRUE)
dummies::dummy(GSS34imputed$Education) %>% psych::describe(., fast = TRUE)


descriptives <- GSS34 %>% 
  summarise_all(.funs = list(
    `Mean or mode` = function(x)ifelse(is.numeric(x), sprintf("%.3f", mean(x, na.rm = TRUE)), as.character(getMode(x))), 
    SD = function(x)ifelse(is.numeric(x), round(sd(x, na.rm = TRUE), 2), NA_real_), 
    `Min or first level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", min(x, na.rm = TRUE)), levels(x)[1]), 
    `Max or last level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", max(x, na.rm = TRUE)), levels(x)[length(levels(x))]), 
    n = function(x)sum(!is.na(x))
  )) %>% 
  pivot_longer(everything(),
        names_to = c("Variable", ".value"),
        names_pattern = "(.+)_(.+)")

descriptives %>% flextable() %>% 
  save_as_docx(path = "descriptives.docx")

descriptives %>% gt::gt(rownames_to_stub = TRUE) %>%
  tab_header(title = "Descriptive statistics", subtitle = "GSS 2019") %>% 
  tab_source_note("SD NA for categorical variables") #%>% gtsave("descriptives GSS V 2019.tex")
```



```{r Saving output}
# One
GSS34 %>% 
  tabyl(`var`) %>% 
  gt() %>% 
  gtsave(filename = "var.tex")

GSS34 %>% 
  tabyl(var) %>% 
  flextable() %>% 
  save_as_docx(path = "var.docx")

# Two/three way
GSS34 %>% 
  tabyl(var1, var2) %>% 
  gt() %>% 
  gtsave(filename = "vars.tex")

GSS34 %>% 
  tabyl(var1, var2) %>%  
  flextable() %>% 
  save_as_docx(path = "vars.docx")

# Print one-way table for every (non-numeric) variable 
GSS34imputed %>%  
   select(!where(is.numeric)) %>% # Or don't select anything for all, or is.numeric for numeric but that's usually overkill, unless you really think there is a weird distribution we should see. 
   imap(~ {variable <- .y
   tabyl(.) %>% 
     rename_with(~ variable, 1) %>% 
     adorn_pct_formatting(.) 
   #%>%  gt(.) %>% gtsave(str_c(variable, ".tex")) OR     save_as_docx(path = str_c(variable, ".docx"))
    })

GSS34imputed %>%  
   select(!where(is.numeric)) %>% # Or don't select anything for all, or is.numeric for numeric but that's usually overkill, unless you really think there is a weird distribution we should see. 
   imap(~ {variable <- .y
   levels(.)  })
   
```


Note: Figures are saved by running cairo_pGSS34 first, then the ggplot code, then dev.off. Run without cairo/dev.off first to make sure everything runs well, that the titles make sense, etc and flag any issue. No need to keep saving wrong plots. When you are sure they are good to save, run cairo first then turn off saving with dev.off.  

```{r Fig 1 Grouped means differences, echo=FALSE}
# Load library
library(survey)
GSS34imputed <- read_rds("GSS_V_2019_imputed.R")
GSS34imputed$BlackWhite %>% tabyl %>% adorn_pct_formatting()
GSS34imputed$IndigenousWhite %>% tabyl %>% adorn_pct_formatting()
GSS34imputed$oPoCWhite %>% tabyl %>% adorn_pct_formatting()
GSS34imputed %<>% mutate(BlackWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Black" ~ "Black", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
GSS34imputed %<>% mutate(IndigenousWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Indigenous" ~ "Indigenous", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
GSS34imputed %<>% mutate(oPoCWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Other Person of Colour"~ "Other visible minority", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))

GSS34imputed.weighted <- survey::svydesign(ids= ~WeightsRace, weights = ~WeightsRace, data = GSS34imputed) # ids=~to the Weights variable
#t1W <- svyttest(stdPoliceConfidence~Ethnicity2, design = GSS34imputed) GSS34 is wrong and so is DV name
GSS34imputed.weighted$variables %>% names()
t1W <- svyttest(`standardized police confidence`~Ethnicity2, design = GSS34imputed.weighted)
#t2W <- svyttest(`standardized police confidence`~Ethnicity, design = GSS34imputed.weighted)
t2B <- svyttest(`standardized police confidence`~BlackWhite, design = GSS34imputed.weighted)
t2I <- svyttest(`standardized police confidence`~IndigenousWhite, design = GSS34imputed.weighted)
t2V <- svyttest(`standardized police confidence`~oPoCWhite, design = GSS34imputed.weighted)
#t2W <- svyglm(`standardized police confidence`~Ethnicity, design = GSS34imputed.weighted)#This does not graph
t3W <- svyttest(`standardized police confidence`~Gender, design = GSS34imputed.weighted)
t4W <- svyttest(`standardized police confidence`~Income, design = GSS34imputed.weighted)
t5W <- svyttest(`standardized police confidence`~`Place of birth`, design = GSS34imputed.weighted)
# tW_list <- list(t1W, t2W, t3W, t4W, t5W)

tW_list <- list(t1W, t2B, t2I, t2V, t3W, t4W, t5W)

t_GSS34W <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tW_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_GSS34W <- rbind(t_GSS34W, vec)
}

t_GSS34W %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)",
                               "Gender", "Income", "Place of birth"), 
                  Estimate = "GSS 2019")

names(t_GSS34W) = c("mean", "lower", "upper", "Variable", "Model")
summary(t2W)

cairo_pGSS34("Figure 1 GSS V 2019_releveled income.pGSS34", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_GSS34W %>%
  ggplot(aes(x = Variable %>%fct_relevel("Income", "Gender", "Place of birth", "other PoC (ref White)","Indigenous (ref White)","Black (ref White)","Grouped PoC (ref White)"), y = mean, color =  Model)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police", limits = c(-.10, .05))+ #adjust limits accordingly
  scale_x_discrete(name = "", labels=c("Place of birth" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref Above)",
                            "Gender" = "Gender (ref male)",
                            #"Ethnicities" = "4-category\nEthnicities (ref White)",
                            "Binary Ethnicity" = "Person of colour\n(ref White)"))


dev.off() #If cairo_pGSS34 was created.
t_GSS34W %>%as_tibble() %>%  flextable() %>% 
  save_as_docx(path = "ttests weighed GSSV2019_binary race.docx")

dwplot(t2W)
summary(t2W) #output to excel

GSS34imputed %$% levels(Income)


#Single item versions


GSS34imputed$`Confidence police rescaled` %>% tabyl
t1W <- svyttest(`Confidence police rescaled`~Ethnicity2, design = GSS34imputed.weighted)
#t2W <- svyttest(`Confidence police rescaled`~Ethnicity, design = GSS34imputed.weighted)
t2B <- svyttest(`Confidence police rescaled`~BlackWhite, design = GSS34imputed.weighted)
t2I <- svyttest(`Confidence police rescaled`~IndigenousWhite, design = GSS34imputed.weighted)
t2V <- svyttest(`Confidence police rescaled`~oPoCWhite, design = GSS34imputed.weighted)
#t2W <- svyglm(`Confidence police rescaled`~Ethnicity, design = GSS34imputed.weighted)#This does not graph
t3W <- svyttest(`Confidence police rescaled`~Gender, design = GSS34imputed.weighted)
t4W <- svyttest(`Confidence police rescaled`~Income, design = GSS34imputed.weighted)
t5W <- svyttest(`Confidence police rescaled`~`Place of birth`, design = GSS34imputed.weighted)
# tW_list <- list(t1W, t2W, t3W, t4W, t5W)
tW_list <- list(t1W, t2B, t2I, t2V, t3W, t4W, t5W)

t_GSS34W <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tW_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_GSS34W <- rbind(t_GSS34W, vec)
}

GSS34$Income %<>% fct_rev() 
GSS35$Income %<>% fct_rev() #ALSO
GSS34 %<>% rename("ResponseId"="recid")
GSS34 <- left_join(GSS34, GSS34imputed %>% select(ResponseId, `Confidence police rescaled`, `Confidence police orig`), by = "ResponseId")
GSS34unimputed.weighted <- survey::svydesign(ids= ~WGHT_PER, weights = ~WGHT_PER, data = GSS34) # ids=~to the Weights variable
t1WU <- svyttest(`Confidence police rescaled`~Ethnicity2, design = GSS34unimputed.weighted)
t2BU <- svyttest(`Confidence police rescaled`~BlackWhite, design = GSS34unimputed.weighted)
t2IU <- svyttest(`Confidence police rescaled`~IndigenousWhite, design = GSS34unimputed.weighted)
t2VU <- svyttest(`Confidence police rescaled`~oPoCWhite, design = GSS34unimputed.weighted)
#t2W <- svyglm(`Confidence police rescaled`~Ethnicity, design = GSS34imputed.weighted)#This does not graph
t3WU <- svyttest(`Confidence police rescaled`~Gender, design = GSS34unimputed.weighted)
t4WU <- svyttest(`Confidence police rescaled`~Income, design = GSS34unimputed.weighted)
t5WU <- svyttest(`Confidence police rescaled`~`Place of birth`, design = GSS34unimputed.weighted)
# tW_list <- list(t1W, t2W, t3W, t4W, t5W)
tWU_list <- list(t1WU, t2BU, t2IU, t2VU, t3WU, t4WU, t5WU)
t_GSS34WU <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tWU_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_GSS34WU <- rbind(t_GSS34WU, vec)
}
t_GSS34WU %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)", "Gender", "Income", "Place of birth"), 
                  Model = "GSS 2019 unimputed", Outcome = "Single-item")
names(t_GSS34WU) = c("mean", "lower", "upper", "Variable", "Model", "Outcome")

t_GSS34W %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)",
                               "Gender", "Income", "Place of birth"), 
                  Model = "GSS 2019", Outcome = "Single-item")

names(t_GSS34W) = c("mean", "lower", "upper", "Variable", "Model", "Outcome")


#cairo_pdf("Figure 1 GSS V 2019_releveled income.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_GSS34WU %>%
  ggplot(aes(x = Variable %>%fct_relevel("Income", "Gender", "Place of birth", "other PoC (ref White)","Indigenous (ref White)","Black (ref White)","Grouped PoC (ref White)"), y = mean, color =  Model)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly
  scale_x_discrete(name = "", labels=c("Place of birth" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref Above)",
                            "Gender" = "Gender (ref male)",
                            #"Ethnicities" = "4-category\nEthnicities (ref White)",
                            "Binary Ethnicity" = "Person of colour\n(ref White)"))


```


```{r T Test data}
ttestbind <- bind_rows(t_GSS34W, t_GSS34WU, t_ICCW, t_ICCWU, t_GSS35W, t_GSS35WU)

setwd("//mcg-main/equipes/Projet 7107/Isadora/")
ttests <- readxl::read_xlsx("Revise and resubmit/Jan 2025/Ttests single item DVs.xlsx") %>% select(Estimate:Outcome)

ttests %>%
  ggplot(aes(x = Variable %>% fct_relevel("Income", "Gender", "Place of birth", "other PoC (ref White)","Indigenous (ref White)","Black (ref White)","Grouped PoC (ref White)"), y = Estimate, color = Imputed)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=Estimate), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("grey35", "blue"))+
  facet_wrap(Data %>% fct_relevel("GSS 2019", "ICC ED 2020", "GSS 2020")~.)+
  theme+
   theme(axis.text.x = element_text(size = 14, angle = 90), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly
  scale_x_discrete(name = "", labels=c("Place of birth" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref Above)",
                            "Gender" = "Women, trans &\nnon-binary (ref male)",
                            #"Ethnicities" = "4-category\nEthnicities (ref White)",
                            "Grouped PoC (ref White)" = "Person of colour\n(ref White)"))

ttests$Variable %>% unique
```

#NO protesting data for  2019
```{r Regressions}
GSS34 <- read_rds("GSS_V_2019.R")
glimpse(GSS34imputed)

#DV: Police confidence 
GSS34StatsCanWeights <- GSS34imputed %$% lm(`standardized police confidence`~ Ethnicity+ Income +`Place of birth`+Gender +
                   Cohort + Regions + Education, weights = WGHT_PER) 
GSS34RakedWeights <- GSS34imputed %$% lm(`standardized police confidence`~ Ethnicity+ Income +`Place of birth`+Gender +
                   Cohort + Regions + Education, weights = WeightsRace) 
AgesqGSS34StatsCanWeights <- GSS34imputed %$% lm(`standardized police confidence`~ Ethnicity+ Income +`Place of birth`+Gender + Age+ (Age^2) + Regions + Education, weights = WGHT_PER) 
AgesqGSS34RakedWeights <- GSS34imputed %$% lm(`standardized police confidence`~ Ethnicity+ Income +`Place of birth`+Gender +
                   Age +(Age^2) + Regions + Education, weights = WeightsRace) 
BINGSS34StatsCanWeights <- GSS34imputed %$% lm(`standardized police confidence`~ Ethnicity2+ Income +`Place of birth`+Gender +
                   Cohort + Regions + Education, weights = WGHT_PER) 
BINGSS34RakedWeights <- GSS34imputed %$% lm(`standardized police confidence`~ Ethnicity2+ Income +`Place of birth`+Gender +
                   Cohort + Regions + Education, weights = WeightsRace) 
levels(GSS34imputed$Ethnicity) %>% sprintf()
stargazer(GSS34StatsCanWeights, GSS34RakedWeights, type = "latex", title = "Standardized police confidence DV, OLS", column.labels = c("Statscan Weight", "Raked Weight"), 
          covariate.labels=c("Police confidence (std)", "Black", "Indigenous", "Other Person of Colour", 
                             "Below poverty line", "Born outside Canada", "WT&NB", "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "Prairies", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"))
stargazer(BINGSS34StatsCanWeights, BINGSS34RakedWeights, type = "latex", title = "Standardized police confidence DV, Binary race IV", column.labels = c("Statscan Weight", "Raked Weight"), covariate.labels=c("Police confidence (std)", "Persons of Colour (aggregated)", 
                             "Below poverty line", "Born outside Canada", "WT&NB", "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "Prairies", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"))
stargazer(AgesqGSS34StatsCanWeights, AgesqGSS34RakedWeights, type = "latex", title = "Standardized police confidence DV, Age squared", column.labels = c("Statscan Weight", "Raked Weight"), 
          covariate.labels=c("Police confidence (std)", "Black", "Indigenous", "Other Person of Colour", 
                             "Below poverty line", "Born outside Canada", "WT&NB", "Age(sq)",
                             "Prairies", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"))




## DV: protesting, NA for this
library(rms)
library(nnet)
GSS34imputed$Protesting %<>% recode("None" = 0, "Yes" = 1)
mod.protest <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)
mod.protest2 <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)

GSS34imputed %<>% mutate(Agesq=Age^2)
mod.protestAge <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)
mod.protestAge2 <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)

mod.votechoice <- multinom(VoteFor ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education+PID, weights = WeightsRace, Hess = TRUE, data = GSS34imputed)
mod.votechoice2 <- multinom(VoteFor ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education+PID, weights = WeightsRace, Hess = TRUE, data = GSS34imputed)

mod.pid <- multinom(PID ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education, weights = WeightsRace, Hess = TRUE,data = GSS34imputed)
mod.pid2 <- multinom(PID ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education, weights = WeightsRace, Hess = TRUE,data = GSS34imputed)

GSS34imputed$VotedB %<>% recode("No" = 0, "Yes" = 1)
mod.voted <- Glm(VotedB == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)
mod.voted2 <- Glm(VotedB == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)

mod.votedAge <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)
mod.votedAge2 <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS34imputed, family=binomial)

save(mod.protest, mod.protest2, mod.pid, mod.pid2, mod.voted, mod.voted2, mod.votechoice, mod.votechoice2,
     mod.protestAge, mod.protestAge2,mod.votedAge, mod.votedAge2,
     file = "EffectsModelsSOURCE.RData")

# For stargazer to run, load the libraries and save without rms:: or nnet:: but avoid otherwise because rms will mask necessary packages so once you have saved the models you could restart the session and load(EffectsModelsSOURCE.RData) without library(rms) or (nnet) loaded

library(stargazer)
stargazer(mod.votechoice, mod.votechoice2, type = "html", out = "votechoiceSOURCE.html")

stargazer(mod.voted, mod.voted2, mod.protest, mod.protest2, title = "Logit models: Effect on protest and voting behavior",
          align=TRUE, 
          no.space=TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), GSS34 = FALSE, type = "latex", 
 covariate.labels=c("Police confidence (std)", "Police confidence (std)", 
    "Person of colour",
    #Please add the Ethnicity order labels, ref should be "White". Double check by making the stargazer without running covariate.labels() argument first.
 "Below poverty line",
          "Male", "Born outside Canada", "Gen X", "Boomer", "Older", #"Age",  "$Age^2$",
 "Prairies", "BC", "Atlantic", "Quebec",
          "Medium Education", "High Education", "PID: Other/None", "Bloc", "Conservative", "Green", "NDP"))

stargazer(mod.pid, mod.pid2,
          title = "PID effect (reference party: Liberal)",
          align=TRUE, 
          no.space=TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), GSS34 = FALSE, type = "html", out="Effects_PID.html", 
 covariate.labels=c("Police confidence", "PoC (ref White)", 
                        #Please add the Ethnicity order labels, ref should be "White". Double check by making the stargazer without running covariate.labels() argument first.
                    "Above poverty line",
          "Male", "Born outside Canada", "Gen X (ref GenZ+Millen)", "Boomer", "Older",
          "Prairies (ref Ontario)", "BC", "Atlantic", "Quebec",
          "Medium Education", "High Education"))

stargazer(mod.votechoice, 
          title = "Party vote choice effect (reference party: Liberal)",
          align=TRUE, 
          no.space=TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), GSS34 = FALSE, type = "latex", #out="Effects_VoteFor.html",
 covariate.labels=c("Police confidence", "Person of colour", "Below poverty line", "Male", "Born outside Canada", "Gen X", "Boomer", "Older", 
                             "Prairies (ref Ontario)", "BC", "Atlantic", "Quebec",
          "Medium", "High","Other/none", "Bloc", "Conservative", "Green", "NDP"))

```


```{r Fig 2}
cairo_pGSS34("Pr protest.pGSS34", onefile = TRUE)
mod.protest%>%
  ggeffect(terms = c("stdPoliceConfidence[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess", color = "black")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
  xlab("Confidence in Police")+
  ylab("Probability of protesting")+
  ggtitle("")

mod.protest%>%  #Check if this works
  ggeffect(terms = c("stdPoliceConfidence[all]", Ethnicity)) %>% 
  ggplot(., aes(x, predicted, color = Ethnicity))+
  geom_smooth(se = TRUE, method = "loess")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
  xlab("Confidence in Police")+
  ylab("Probability of protesting")+
  ggtitle("")

mod.protest2%>%  #This model now contains PID
  ggeffect(terms = c("stdPoliceConfidence[all]", Ethnicity2)) %>% 
  ggplot(., aes(x, predicted, color = Ethnicity))+
  geom_smooth(se = TRUE, method = "loess")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
  xlab("Confidence in Police")+
  ylab("Probability of protesting")+
  ggtitle("")

dev.off()
#Record results
```

Run twice, once with Ethnicity and once with Ethnicity2
```{r Fig 3}
`Quebec parties` <- c("#33B2CC", #Bloc
"#1A4782",#Conservative
"#3D9B35",#Green 
"#D71920",#Liberal
"#F37021", #NDP
"gray")
`ROC parties` <- c("#1A4782",#Conservative
                   "#3D9B35",#Green 
                   "#D71920",#Liberal
                   "#F37021", #NDP
                   "gray")

mod.voteROC <- multinom(VoteFor #mod.pidROC <- multinom(PID
                        ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+
                          Cohort + Regions + Education +PID, weights = WeightsRace, Hess = TRUE,data = GSS34imputed %>% filter(Regions !="Quebec"))

mod.votecQC <- multinom(VoteFor ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+
                          Cohort  + Education+PID, weights = WeightsRace, Hess = TRUE,data = GSS34imputed %>% filter(Regions =="Quebec"))

mod.voteROC2 <- multinom(VoteFor #mod.pidROC <- multinom(PID
                         ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+
                           Cohort + Regions + Education +PID, weights = WeightsRace, Hess = TRUE,data = GSS34imputed %>% filter(Regions !="Quebec"))

mod.votecQC2 <- multinom(VoteFor ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+
                           Cohort  + Education+PID, weights = WeightsRace, Hess = TRUE,data = GSS34imputed %>% filter(Regions =="Quebec"))

cairo_pGSS34("Pr Vote ROC.pGSS34", onefile = TRUE)

#Change model mod.votecQC etc then rerun
mod.voteROC %>% ggeffect(terms = "StandardizedPoliceConfidence[all]") %>% 
  mutate(Party = recode(response.level, "Other.None"="Other/None")) %>%
  ggplot(., aes(x, predicted))+ 
  geom_line(aes(color = Party))+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = Party), alpha = .25)+
  guides(color = guide_legend(title = "Party", nrow = 2))+ #if facets are off
  guides(fill = "none")+
  scale_color_manual(values = `ROC parties`)+
  scale_fill_manual(values = `ROC parties`)+
  #facet_wrap(~Party)+
  theme+
  ylab("Predicted probability of Vote for")+
  xlab("Confidence in Police")+
  ggtitle("model name used")
dev.off()
```


#GSS35
```{r setup, include=FALSE}
# Clear environment
rm(list = ls())
getwd()
setwd("T:/Projet 7107")
# Get packages
#.libPaths("L:/_Rpackages/4.0") #change to .libpaths()
#.libPaths("T:/Projet 7107/_Rpackages/") # ggstatsplot keep checking dependencies
# .libPaths("T:/Projet 7107/_Rpackages/) #ggstatsplot keep checking dependencies
.libPaths("//mcg-main/equipes/Projet 7107/_Rpackages")
# library()


# Set chunk options
knitr::opts_chunk$set(message = FALSE, include = FALSE)
# Set working directory
setwd("//mcg-main/equipes/Projet 7107/Isadora/GSS SI 2020/")

#

# Load packages
library(ggeffects)
library(survey)
# Import data
#GSS35 <- haven::read_dta("S:/ESG - GSS 2020/GSS_ESG_35_v1/data_donnees/stata/gss_esg_35_analm_f1_v1.dta") old data
#GSS35 <- haven::read_dta("S:/GSS_ESG_35/GSS_ESG_35_v1/data_donnees/stata/gss_esg_35_analm_f1_v1.dta") old data
GSS35 <- haven::read_dta("S:/GSS_ESG_35/GSS_ESG_35_v2/data_donnees/stata/gss_esg_35_analm_f3_v2.dta")

# Check
head(GSS35)
dim(GSS35)

# # Eventually
GSS35imputed <- read_rds("GSS_V_2020_imputed.R")
GSS35 <- read_rds("GSS_V_2020.R")

theme <- theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
       aspect.ratio = 1,
       legend.position="top", #legend.position="right",
       panel.background = element_rect(fill = "white"),
       #plot.margin = margin (0.1,0.1,0.1,0.1, "cm"),
       strip.background = element_blank(),
       panel.grid.minor=element_blank(),
       panel.grid.major= element_line(color = "seashell1"),
       plot.background=element_blank(),
       title = element_text(size = 26),
#    axis.text.x = element_blank(), #remove tickmarks
    axis.title.x = element_text(size = 20, colour = "black"),
    axis.title.y = element_text(size = 20, colour = "black"),
    legend.title = element_text(size = 18, colour = "black"),
    legend.text = element_text(size = 16, colour = "black"),
axis.text = element_text(size = 16))
```

#Recode Variables

##DVs
```{r DV standardized confidence}
# Examine trust in police variable
str(GSS35$CII_10)
table(GSS35$CII_10)

# Note that they are coded such that 1 = no confidence and 5 = a great deal of confidence and 9 = Not stated

# Recode "Not stated" category as NA
GSS35 %<>%  
  mutate(CII_10 = ifelse(CII_10 == 9, NA, CII_10), # Police
         CII_15 = ifelse(CII_15 == 9, NA, CII_15), # Justice system and courts
         CII_30 = ifelse(CII_30 == 9, NA, CII_30), # School system
         CII_40 = ifelse(CII_40 == 9, NA, CII_40), # Federal Parliament
         CII_45 = ifelse(CII_45 == 9, NA, CII_45), # Banks
         CII_50 = ifelse(CII_50 == 9, NA, CII_50), # Major corporations
         CII_55 = ifelse(CII_55 == 9, NA, CII_55), # Local merchants and business people
         CII_60 = ifelse(CII_60 == 9, NA, CII_60)) # Canadian media

# Check
GSS35 %>% select(starts_with("CII_")) %>% map(~tabyl(.) %>% adorn_pct_formatting(digits = 2))

# Create standardized police variable
GSS35 %<>%
  mutate(`standardized police confidence`= 
                   scales::rescale(CII_10 - (select(., starts_with("CII_")) %>% rowMeans(na.rm = TRUE))))

# Check
summary(GSS35$`standardized police confidence`)
```


```{r additiona confidence institutions}
#Re-scale to flip directionality and make 0 to 1 ,#Orig answers #CIP_10 1 means "no confidence" and 5 means " a great deal of trust"
GSS35$`Confidence police rescaled` <-  recode(GSS35$CII_10,
                "1" = 0,
                "2" = 1/4,
                "3" = 2/4,
                "4" = 3/4,
                "5" = 1,
         .missing = NA_real_, .default = NA_real_)


GSS35$`Confidence police rescaled` %>% tabyl

GSS35$`Confidence police orig` <- recode_factor(GSS35$CII_10,
                "1" = "No trust at all",
                "2" = "2",
                "3" = "3",
                "4" = "4",
                "5" = "A great deal",
                "9" = NA_character_,
           .missing = NA_character_)

GSS35 %>% tabyl(`Confidence police orig`, `Confidence police rescaled`)
View(GSS35imputed)
GSS35 %<>%  
  mutate(IncomeFixed = case_when(
   INCG1 ==1 ~ "Below poverty line",
   INCG1 %in% c(2:7) ~ "Above poverty", # Recode "Valid skip" category as NA
   TRUE ~ "Missing"))
GSS35$IncomeFixed %>% tabyl()

GSS35imputed <- left_join(GSS35imputed, GSS35 %>% select(recid, `Confidence police rescaled`, `Confidence police orig`, IncomeFixed), by = "recid")
GSS35imputed$Regions %>% tabyl
GSS35imputed$Regions %<>% recode("Prairie" = "West")
GSS35imputed %<>% rename("ResponseId"="recid")
GSS35imputed$diffscore <- GSS35imputed$`Confidence police rescaled`-GSS35imputed$`standardized police confidence`
GSS35imputed$IncomeFixed %>% tabyl()

```

```{r Fig 1 and 2}
#missingness summaries
colnames(GSS35imputed)
GSS35%$%summary(age)
GSS35imputed%$%summary(Age)
GSS35imputed%$%summary(Regions)
GSS35imputed%$%summary(diffscore)
GSS35imputed%$%summary(`standardized police confidence`)
GSS35imputed$Gender %<>% fct_rev #Male needs to be reference category

GSS35imputed %>% select(-ResponseId) %>% select_if(is.factor) %>% map(~(tabyl(.) %>% adorn_totals("row") %>% adorn_pct_formatting()))

GSS35imputed %>% select(-ResponseId) %>% select_if(is.numeric) %>% map(~(summary(.)))

GSS %>% tabyl(Income, IncomeFixed)
GSS35imputed %>% mutate(diffscore = `Confidence police rescaled`-`standardized police confidence`, 
                        #Category = recode(Gender, "Women trans and non-binary"="WTNB")) %>% 
                     ) %>% rename(Category = Ethnicity) %>% #change variable INCOMEFIXED
  drop_na(diffscore) %>% 
  group_by(Category) %>% 
  group_modify(~ mean_se(.x$diffscore, mult = 1.96)) %>% 
  ggplot(aes(x = Category, y = y))+ #If cat = `Confidence police orig` Category %>% fct_rev() or fct_relevel("Black", after = 3L) 
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.9)) +
#  geom_col(position = position_dodge(0.9)) + 
    geom_point(position = position_dodge(0.9), size=4, shape=21)+
#    geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_y_continuous("confidence in 0 to 1 scale"#, limits = c(0, NA)
                       )+ #yscale contiuous labels = scales::percent
  scale_x_discrete("Ethnicity")+ #Ethnicity, Police confidence response
  #facet_wrap(Survey~.)+
  theme(axis.text.x = element_text(size = 16))+
  ggtitle("Difference between single-item & \nstandardized confidence in police")
          #, subtitle = "based on substantive response") 


#Go to raked data making

#Standardized version
t1s <- svyttest(`standardized police confidence`~Ethnicity, design = GSS35imputed.rake.trimmed)
t2s <- svyttest(`standardized police confidence`~Gender, design = GSS35imputed.rake.trimmed) 
t3s <- svyttest(`standardized police confidence`~Income, design = GSS35imputed.rake.trimmed)
t4s <- svyttest(`standardized police confidence`~`Place of birth`, design = GSS35imputed.rake.trimmed)
ts_list <- list(t1s, t2s, t3s, t4s)


t_dfs <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in ts_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfs <- rbind(t_dfs, vec)
}

t_dfs %<>% mutate(Variable = c("Ethnicity", "Gender",  "Income", "PoB"), 
                  Estimate = "Normalized")

names(t_dfs) = c("mean", "lower", "upper", "Variable", "Police confidence \nestimate")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_dfs %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police GSS35")+ #adjust limits accordingly
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)")) 

#dev.off() #If cairo_pdf was created.

##Recode for rescaled unstandardized

t1u <- svyttest(`Confidence police rescaled`~Ethnicity, design = GSS35imputed.rake.trimmed)
t2u <- svyttest(`Confidence police rescaled`~Gender, design = GSS35imputed.rake.trimmed)
t3u <- svyttest(`Confidence police rescaled`~Income, design = GSS35imputed.rake.trimmed)
t4u <- svyttest(`Confidence police rescaled`~`Place of birth`, design = GSS35imputed.rake.trimmed)
tu_list <- list(t1u, t2u, t3u, t4u)

t_dfu <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tu_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfu <- rbind(t_dfu, vec)
}

t_dfu %<>% mutate(Variable = c("Ethnicity", "Gender", "Income", "PoB"), 
                  Estimate = "Single item")

names(t_dfu) = c("mean", "lower", "upper", "Variable", "Police confidence \nestimate")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_dfu %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police GSS35")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)"))

bind_rows(t_dfs, t_dfu) %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police, GSS35")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)"))
#dev.off() #If cairo_pdf was created.

SIGSS35StatsCanWeights <- GSS35imputed %$% lm(`Confidence police rescaled`~ Ethnicity+ Income +`Place of birth`+Gender +
                                           Cohort + Regions + Education, weights = WGHT_PER) 
SIGSS35RakedWeights <- GSS35imputed %$% lm(`Confidence police rescaled`~ Ethnicity+ Income +`Place of birth`+Gender +
                                        Cohort + Regions + Education, weights = WeightsRace) 

GSS35StatsCanWeights <- GSS35imputed %$% lm(`standardized police confidence`~ Ethnicity+ Income +`Place of birth`+Gender +
                                         Cohort + Regions + Education, weights = WGHT_PER) 
GSS35RakedWeights <- GSS35imputed %$% lm(`standardized police confidence`~ Ethnicity+ Income +`Place of birth`+Gender +
                                      Cohort + Regions + Education, weights = WeightsRace) 

SIBINGSS35StatsCanWeights <- GSS35imputed %$% lm(`Confidence police rescaled`~ Ethnicity2+ Income +`Place of birth`+Gender +
                                            Cohort + Regions + Education, weights = WGHT_PER) 
SIBINGSS35RakedWeights <- GSS35imputed %$% lm(`Confidence police rescaled`~ Ethnicity2+ Income +`Place of birth`+Gender +
                                         Cohort + Regions + Education, weights = WeightsRace) 

BINGSS35StatsCanWeights <- GSS35imputed %$% lm(`standardized police confidence`~ Ethnicity2+ Income +`Place of birth`+Gender +
                                            Cohort + Regions + Education, weights = WGHT_PER) 
BINGSS35RakedWeights <- GSS35imputed %$% lm(`standardized police confidence`~ Ethnicity2+ Income +`Place of birth`+Gender +
                                         Cohort + Regions + Education, weights = WeightsRace) 

stargazer(GSS35StatsCanWeights, SIGSS35StatsCanWeights, column.labels = c("Standardized", "Single-item"), type = "latex",
            title = "GSS35",
          covariate.labels=c("Black", "Indigenous", "Other PoC", 
                             "Below poverty line", "Born outside Canada", "WT&NB", 
                             "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "West", "BC", "Atlantic", "Quebec",
                             "Medium Ed", "High Ed"), notes = "Statscan weights")

stargazer(GSS35RakedWeights, SIGSS35RakedWeights,column.labels = c("Standardized", "Single-item"), type = "latex", 
          title = "GSS35",
          covariate.labels=c("Black", "Indigenous", "Other PoC", 
                             "Below poverty line", "Born outside Canada", "WT&NB", 
                             "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "West", "BC", "Atlantic", "Quebec",
                             "Medium Ed", "High Ed"), notes = "Raked weights")

stargazer(GSS35StatsCanWeights, SIGSS35StatsCanWeights, type = "latex",
          title = "Standardized police confidence DV, OLS", column.labels = c("Statscan Weight", "Raked Weight"),
          covariate.labels=c("Black", "Indigenous", "Other Person of Colour", 
                             "Below poverty line", "Born outside Canada", "WT&NB", 
                             "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "Prairies", "BC", "Atlantic", "Quebec",
                             "Medium Ed", "High Ed"))

stargazer(BINGSS35StatsCanWeights, BINGSS35RakedWeights, 
          type = "latex", title = "Standardized police confidence DV, Binary race IV", column.labels = c("Statscan Weight", "Raked Weight"), 
          covariate.labels=c("Persons of Colour (grouped)", 
                             "Below poverty line", "Born outside Canada", "WT&NB", "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "Prairies", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"))


#Protest analysis
GSS35imputed %>% tabyl(Protesting, Ethnicity) %>% adorn_percentages("all") %>%  adorn_pct_formatting(digits = 2) %>%
  adorn_title(row_name = "Protested", col_name = "Ethnicity") 
GSS35imputedREDUX$Protesting %>% tabyl()
GSS35imputed %>% tabyl(Protesting)
GSS35imputedREDUX$Protesting %<>% recode("No" = 0, "Yes" = 1)
library(rms)
library(nnet)
GSS35imputed %<>% rename(POB=`Place of birth`, stdPoliceConfidence=`standardized police confidence`, ConfpolR=`Confidence police rescaled`)
GSS35imputed %<>% rename(`Place of birth`=, `standardized police confidence`=stdPoliceConfidence, `Confidence police rescaled`=ConfpolR)
GSS35imputed$`Confidence police rescaled` %>% summary()

stdmod.protest <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education, x=TRUE, y=TRUE, weights = WeightsRace, data = GSS35imputed, family=binomial)

simod.protest <- Glm(Protesting == 1 ~ ConfpolR + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education , x=TRUE, y=TRUE, weights = WeightsRace, data = GSS35imputed, family=binomial)

stargazer(stdmod.protest, simod.protest, title = "Logit models: Confidence on police regressed on protest",
          align=TRUE, 
          no.space=TRUE, p.auto = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), 
          type = "text",
          column.labels = c("Standardized", "Single-item"),
          #type = "html", out = "ProtestGSS2020.html", 
          # type = "latex", out = "ProtestGSS2020.tex", 
 covariate.labels=c("Police confidence", "Police confidence",
"Black",
"Indigenous",
"Other Person of Colour (ref White)",
 "Below poverty line",
          "WTNB", "Born outside Canada", "Gen X", "Boomer", "Older", 
"West", "BC", "Atlantic", "Quebec",
          "Medium Education", "High Education", "Constant"))


#cairo_pdf("Pr protest.pdf", onefile = TRUE)
library(ggeffects)
stdmod.protest%>%
  ggeffect(terms = c("stdPoliceConfidence[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess", color = "black")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
  xlab("std Confidence in Police")+
  ylab("Probability of protesting, no PID")+
  ggtitle("")

simod.protest%>%
  ggeffect(terms = c("ConfpolR[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess", color = "black")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
xlab("single-item\nConfidence in Police")+
  ylab("Probability of protesting")+
  ggtitle("")

simod.protest%>%
  ggeffect(terms = c("ConfpolR[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess", color = "black")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
xlab("Confidence in Police")+
  ylab("Probability of protesting")+
  ggtitle("")


```


```{r Protesting}
# REP_05 How interested are you in politics? (Do not include!)

# In the past 12 months, have you done any of the following activities? 
# REP_10 Political activity - Past 12 months - Searched for information
# REP_20 Political activity - Past 12 months - Volunteered for political party
# REP_30 Political activity - Past 12 mo. - Expressed views - News/politician
# REP_35 Political activity - Past 12 months - Expressed views - Internet
# REP_40 Political activity - Past 12 months - Signed paper petition
# REP_45 Political activity - Past 12 months - Signed Internet petition
# REP_50 Political activity - Past 12 months - Product choice ethical reasons
# REP_60 Political activity - Past 12 months - Attended public meeting
# REP_70 Political activity - Past 12 months - Spoke at public meeting
# REP_80 Political activity - Past 12 months - Participated in a demonstration
# REP_85 Political activity - Past 12 months - Visible sign of support

# Note that all participation variables are coded such that 1) Yes 2) No

# Examine political activity variables
apply(GSS35[c(162:172)], 2, table)

# Recode such that 1, Yes, 0, No and everything else NA
GSS35 %<>% 
  mutate(REP_10 = ifelse(REP_10 %in% c(6:9), NA, REP_10),
         REP_10 = ifelse(REP_10 == 2, 0, 1),
         REP_20 = ifelse(REP_20 %in% c(6:9), NA, REP_20),
         REP_20 = ifelse(REP_20 == 2, 0, 1),
         REP_30 = ifelse(REP_30 %in% c(6:9), NA, REP_30),
         REP_30 = ifelse(REP_30 == 2, 0, 1),
         REP_35 = ifelse(REP_35 %in% c(6:9), NA, REP_35),
         REP_35 = ifelse(REP_35 == 2, 0, 1),
         REP_40 = ifelse(REP_40 %in% c(6:9), NA, REP_40),
         REP_40 = ifelse(REP_40 == 2, 0, 1),
         REP_45 = ifelse(REP_45 %in% c(6:9), NA, REP_45),
         REP_45 = ifelse(REP_45 == 2, 0, 1),
         REP_50 = ifelse(REP_50 %in% c(6:9), NA, REP_50),
         REP_50 = ifelse(REP_50 == 2, 0, 1),
         REP_60 = ifelse(REP_60 %in% c(6:9), NA, REP_60),
         REP_60 = ifelse(REP_60 == 2, 0, 1),
         REP_70 = ifelse(REP_70 %in% c(6:9), NA, REP_70),
         REP_70 = ifelse(REP_70 == 2, 0, 1),
         REP_80 = ifelse(REP_80 %in% c(6:9), NA, REP_80),
         REP_80 = ifelse(REP_80 == 2, 0, 1),
         REP_85 = ifelse(REP_85 %in% c(6:9), NA, REP_85),
         REP_85 = ifelse(REP_85 == 2, 0, 1))

# Isadora, is there a cleaner way to code this? I couldn't search it up because there's no internet access here, but let me know if there is! :)

# Check
GSS35 %>% select(starts_with("REP_")) %>% map(~tabyl(.) %>% adorn_pct_formatting(digits = 2))

# GSS35 %>% select(., c(162:172)) # Select all REP_ variables except for REP_05

# Create political participation variable
GSS35 %<>% mutate(`Political participation` =
                      rowMeans(select(., starts_with("REP_")), na.rm = TRUE))

# Check
summary(GSS35$`Political participation`)
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0.09091 0.27273 0.36364 0.38499 0.45454 9.00000 

tabyl(GSS35$`Political participation`) %>% adorn_pct_formatting(digits = 2)
quantile(GSS35$`Political participation`, na.rm = TRUE)

GSS35 %<>% mutate(`Political participation` = case_when(
  `Political participation` <1/9 ~ "Almost none", 
  between(`Political participation`, 1/9, right = 3  )~ "Casual",
  `Political participation` >= 3  ~ "High",
  TRUE ~ NA_character_) %>% fct_relevel("Almost none", "Casual", "High"))

# Check
tabyl(GSS35$`Political participation`)


# Recode protesting variable
GSS35 %<>% rename("Protesting" = REP_80) # Participated in a demonstration

GSS35 %<>% 
  mutate(Protesting = case_when(
    Protesting == 1 ~ "Yes",
    Protesting == 0 ~ "No",
    TRUE ~ NA_character_
  )) 

# Check
table(GSS35$Protesting)

```

# IVs

```{r Race recode}
# Explore race variables
str(GSS35$vismin) # numeric
tabyl(GSS35$vismin)
tabyl(GSS35$visminfl)

str(GSS35$ABM_01A) # numeric
table(GSS35$ABM_01A)

# Replace numeric code with appropriate category
GSS35 %<>%
  mutate(Ethnicity = case_when(
    ABM_01A == 2 ~ "Indigenous",
    vismin == 3 ~ "Black",
    vismin == 13 & ABM_01A == 1 ~ "White", 
    vismin == 1 | vismin == 2 | vismin == 4 | vismin == 5 | vismin == 6 | vismin == 7 | vismin == 8 | vismin == 9 | vismin == 10 | vismin == 11 | vismin == 12 ~ "Other Person of Colour",
    TRUE ~ NA_character_
  ),
  Ethnicity = factor(Ethnicity, levels = c("White", "Black", "Indigenous", "Other Person of Colour"))) 

# Check recoding
tabyl(GSS35$Ethnicity) %>% adorn_pct_formatting(digits = 2)

# Create another ethnicity variable
GSS35$Ethnicity2 <- GSS35$Ethnicity

# Lump all PoC into one category 
GSS35$Ethnicity2 %<>% 
  fct_lump_n(., n = 1) %>% 
  recode("Other" = "PoC")

# Check
levels(GSS35$Ethnicity2)
tabyl(GSS35$Ethnicity2)
```

```{r Poverty}
# Examine inc variable THIS WAS SUPRESSED we should have been using INCG1
tabyl(GSS35$flagrr)
str(GSS35$inc)
summary(GSS35$inc) # Double check responses are same as GSS Vic
# Income was removed in v1, recode in v2
# Create income variable
GSS35 %<>%  
  mutate(Income = case_when(
   inc <20000 ~ "Below poverty line",
   inc >= 20000 ~ "Above poverty", # Recode "Valid skip" category as NA
   TRUE ~ "Missing"))

GSS35$inc %>% is.na %>%  tabyl()
tabyl(GSS35$Income) %>% adorn_pct_formatting(2)

#GSS35$Income %<>% fct_relevel("Below poverty line", "Above poverty") STUPID 
GSS35imputed2020$Income %<>% fct_infreq()

GSS35$INCG1 %>% tabyl()
GSS35 %<>%  
  mutate(IncomeFixed = case_when(
   INCG1 ==1 ~ "Below poverty line",
   INCG1 %in% c(2:7) ~ "Above poverty", # Recode "Valid skip" category as NA
   TRUE ~ "Missing"))
GSS35$IncomeFixed %>% tabyl()

GSS35imputed %>% tabyl(Income, IncomeFixed) #same categorization actually

```


```{r Immigration}
# Examine place of birth variable
table(GSS35$brthcan)
# Create place of birth variable
GSS35 %<>%  rename("Place of birth" = brthcan) 

GSS35$`Place of birth` %<>% recode("1" = "Canada", 
                                "2" = "Other", 
                                "9" = NA_character_)

# Check
tabyl(GSS35$`Place of birth`) %>% adorn_pct_formatting(2)

# Examine landed immigrant variable
summary(GSS35$IM_04) 

# Create Years in Canada variable
GSS35 %<>% 
  mutate(`Years in Canada` = case_when(
    `Place of birth` == 1 ~ NA_real_, # Born in Canada 
    IM_04 == 9996 ~ NA_real_, # Skipped question
    IM_04 == 9999 ~ NA_real_, # Not stated
    TRUE ~ 2020 - IM_04
  ))

# Check
GSS35 %>% select(IM_04, `Years in Canada`)
summary(GSS35$`Years in Canada`)
```

```{r Gender}
# Examine variable
table(GSS35$gender)

# Rename variable
GSS35 %<>% rename(Gender = gender)

# Recode Gender variable
GSS35 %<>%
  mutate(Gender = case_when(
    Gender == 1 ~ "Male",
    Gender == 2 | Gender == 3 ~ "Women trans and non-binary",
    TRUE ~ NA_character_
  ),
  Gender = factor(Gender, levels = c("Women trans and non-binary", "Male")))

# Check
tabyl(GSS35$Gender) %>% adorn_pct_formatting(2)
```

##Controls

```{r Age and Cohorts}
# Examine variable
summary(GSS35$age) 

# Rename age variable
GSS35 %<>% rename(Age = age)

# Create Cohort variable
GSS35 %<>% 
 mutate(Cohort = case_when(Age %in% c(18:24) ~ "GenZ and Millennial", 
                           Age %in% c(25:40) ~ "GenZ and Millennial", 
                           Age %in% c(41:56) ~ "Gen X", 
                           Age %in% c(57:75) ~ "Ok Boomer", 
                           Age > 75 ~ "Silent", 
                           TRUE ~ NA_character_),
        Cohort = factor(Cohort, levels = c("GenZ and Millennial", "Gen X", "Ok Boomer", "Silent")))

# Check
GSS35%$%summary(Cohort)
tabyl(GSS35$Cohort)%>% adorn_pct_formatting(2)

```


```{r Education}
# Examine variable
table(GSS35$ED_05)

# Create Education variable
GSS35 %<>% mutate( 
    Education = case_when(
      ED_05 %in% c(1:2) ~ "Low", # Up to (including) Completed secondary/ high school
      ED_05 %in%  c(3:5) ~ "Medium", # Trades, technical, college, university certificate/diploma below bachelor's
      ED_05 %in%  c(6:7) ~ "High", # Bachelor's degree and higher
      TRUE ~ NA_character_) %>%  
      fct_relevel(., "Low", "Medium", "High"))

# Check
GSS35 %>% tabyl(Education)%>% adorn_pct_formatting(2)
```


```{r Language}
# Examine
table(GSS35$lanint)

# Rename
GSS35 %<>% rename(Language = lanint) 

# Recode language variable
GSS35 %<>% mutate(Language = ifelse(Language == 1, "English", "French"))

# Check
tabyl(GSS35$Language)%>% adorn_pct_formatting(2)
```


```{r Province}
# Check 
table(GSS35$prv)

# Recode categories (Check recoding)
GSS35$prv %<>% 
  recode("10" = "Newfoundland and Labrador",
         "11" = "Prince Edward Island",
         "12" = "Nova Scotia",
         "13" = "New Brunswick",
         "24" = "Quebec",
         "35" = "Ontario",
         "46" = "Manitoba",
         "47" = "Saskatchewan",
         "48" = "Alberta", 
         "59" = "British Columbia") # Note there are no respondents from the North

# Rename variable
GSS35 %<>% rename(Province = prv)

# Reorder categories
GSS35$Province %<>% fct_relevel(.,"Ontario")

# Check
levels(GSS35$Province)
tabyl(GSS35$Province) %>% adorn_pct_formatting(2)
# Create regions variable
GSS35$Regions <- GSS35$Province
GSS35$Regions %<>% fct_collapse(., "Atlantic" = c("Newfoundland and Labrador","Prince Edward Island", "Nova Scotia", "New Brunswick"), "Prairie" = c("Manitoba","Saskatchewan","Alberta"))

# Check
tabyl(GSS35$Regions)%>% adorn_pct_formatting(2)
```


```{r final prep}
# Select relevant variables
GSS35 %<>% select(., Ethnicity, Ethnicity2, Income, `Place of birth`, Gender, Age, Cohort, Education, Language, Province, Regions, `Political interest`, `standardized police confidence`, Protesting, `Political participation`, WGHT_PER, recid)

# Create datasets
GSS35 %<>% mutate_if(is.character, factor)
write_rds(GSS35, "GSS_V_2020.R")
```

#### Imputation


```{r missingness}
naniar::miss_var_summary(GSS352020) %>% mutate_if(is.numeric, round,2) %>% gt::gt()

```


```{r Imputation process}
# Import data set
GSS35 <- readRDS("GSS_V_2020.R")

# Load package
library(mice)

# Rename variables
GSS35 %<>% rename(PoB = `Place of birth`, PoliticalInterest =`Political interest`, stdPoliceConfidence =`standardized police confidence`, Politicalparticipation = `Political participation`)

colnames(GSS35)

imp <- mice(GSS35, maxit = 0) 
meth <- imp$method
predM <- imp$predictorMatrix

# Here I am specifying which variables should NOT be used for PREDICTION (outcomes, survey weights):
# variables that should NOT be IMPUTED (outcomes, main IV, survey weights):
predM[, c("stdPoliceConfidence", "VoteLikelihood", "VoteLikelihoodB", "Language", "Province", "Regions", "WGHT_PER", "recid")] = 0  

# Diff methods must be used to impute continuous, binary, and ordinal vars. Here I am specifying which methods for the diff variables:
# None for the ones that shouldn't be imputed
meth[c("stdPoliceConfidence", "VoteLikelihood", "VoteLikelihoodB", "Language", "Province", "Regions", "WGHT_PER", "recid")] = ""

# Numeric variables
meth[c("Age")] = "pmm" # numeric
meth[c("Ethnicity2", "Income", "PoB")] = "logreg" # dummy
meth[c("Ethnicity", "Gender")] = "polyreg" # Unordered categorical factor
meth[c("Cohort", "Education")] = "polr" # Ordered categorical factor

#### Run the multiple (m=5) imputation --------
set.seed(1986)

#Select and run the next three lines together to see how quickly the impute
started <- lubridate::now() 
imputed  <-  mice(GSS35, method = meth, predictorMatrix = predM, m = 5) 
ended <- lubridate::now() 
ended-started
# Start: 2022-02-16 11:53:00 PST
# End: 2022-02-16 11:59:08 PST
# Total time: 6 minutes

# Create dataframe after imputation
GSS35imputed <- mice::complete(imputed)

# Check for missingness
test <- GSS35imputed %>% map_GSS35(~(sum(is.na(.x))) #/length(x) #as proportion of length
                   ) %>% as_tibble() %>% transpose(.) %>% simplify_all()
names(test) <- "Imputed Variables"
test
str(GSS35imputed)

# Rename back to tidyverse convention
GSS35imputed %<>% rename(`Place of birth` = PoB, `Political interest`= PoliticalInterest, `standardized police confidence` = stdPoliceConfidence, `Voted B` = Voted, `Vote likelihood` = VoteLikelihood, `Vote likelihood B` = VoteLikelihoodB, `Political participation` = Politicalparticipation)

write_rds(GSS35imputed, "GSS_V_2020_imputed.R")
```


# Weighing data

```{r Reweighting}
# Computing survey weights (CES has: province, gender, age group, and education; we will add: Ethnicity2)
# https://www.r-bloggers.com/survey-computing-your-own-post-stratification-weights-in-r/

# Load library
library(survey)

# Import dataset
GSS35imputed <- read_rds("GSS_V_2020_imputed.R")
#GSS35imputed$Income %<>% fct_infreq() Above should be ref category


# Create the unweighted survey object:
GSS35imputed.unweighted <- survey::svydesign(ids = ~1, data = GSS35imputed) # ids=~1 means we don't have any weights. R gives us a warning saying we're not applying any weights

# Create the population data frames
# Provinces

# Check
table(GSS35imputed$Regions)

# Create provinces data set 
#POPULATION DATA AND POPORTIONS FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
provinces <- read_csv("//mcg-main/equipes/Projet 7107/Isadora/census_2016_population.CSV") %>%
  select(`Geographic name`, `Population, 2016`) %>% 
  drop_na(`Population, 2016`)

# Check
head(provinces)

# Create Regions variable
provinces$Regions <- provinces$`Geographic name`

provinces$Regions %<>% fct_collapse(., 
                                    "Atlantic" = c("Newfoundland and Labrador",
                                                   "Prince Edward Island", 
                                                   "Nova Scotia", "New Brunswick"), 
                                    "Prairie" = c("Manitoba","Saskatchewan","Alberta") ,
                                    "North" = c("Northwest Territories", "Yukon", "Nunavut"))

# Check
head(provinces)

# Note that Canada total = 35151728

# Add proportions/frequencies
provinces %<>%
  select(-`Geographic name`) %>%
  filter(Regions != "Canada") %>% 
  group_by(Regions) %>%
  summarise(sum = sum(`Population, 2016`),
            prop = sum(`Population, 2016`)/35151728, #POPULATION DATA AND POPORTIONS FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
            freq = nrow(GSS35imputed)*prop)

# Check
head(provinces)

# Region
region.dist <- data.frame(provinces %>% select(Regions, freq) %>% filter(Regions != "North")) # Drop North from calculations because no respondents from the North

region.dist <- data.frame(Regions = c("Ontario", "West", "British Columbia", "Atlantic", "Quebec"), 
                          Freq = nrow(GSS35imputed)*c(0.383, 0.183, 0.132, 0.0664, 0.232)) #0.003231818 if North but no North in GSS35


# Gender
gender.dist <- data.frame(Gender = c("Women trans and non-binary", "Male"),
                          Freq = nrow(GSS35imputed) * c(0.514240946, 0.485759054))

# Cohort
cohort.dist <- data.frame(Cohort = c("GenZ and Millennial", "Gen X", "Ok Boomer", "Silent"),
                       Freq = nrow(GSS35imputed) * c(0.354978037, 
                                                  0.278681465, 
                                                  0.27593917, 
                                                  0.090401327))

# Education
edu.dist <- data.frame(Education = c("Low", "Medium", "High"),
                       Freq = nrow(GSS35imputed) * c(0.352, 0.363, 0.285))


# Race
GSS35imputed$Ethnicity %>% tabyl #Spelling must match
race.dist <- data.frame(Ethnicity = c("White","Black", "Indigenous", "Other Person of Colour"),
                       Freq = nrow(GSS35imputed) * c(0.685, 0.043, .05,0.22)) #All categories

racebin.dist <- data.frame(Ethnicity2 = c("White","PoC"),
                       Freq = nrow(GSS35imputed) * c(0.685, 0.315)) #The category frequencies were flipped

# compute the weights by putting together the marginal distributions from before with our data
library(survey)
GSS35imputed.rake <- rake(design = GSS35imputed.unweighted,
                       sample.margins = list(~Regions, ~Gender, ~Cohort, ~Education,
                                             ~Ethnicity,~Ethnicity2),
                       population.margins = list(region.dist, gender.dist, cohort.dist, edu.dist, race.dist, racebin.dist))


# Trim the weights
summary(weights(GSS35imputed.rake)) # Some of the weights are much too large & too small

GSS35imputed.rake.trimmed <- trimWeights(GSS35imputed.rake, lower = 0.3, upper=3, strict=TRUE) 
summary(weights(GSS35imputed.rake.trimmed)) # Now all the weights range between 0.3 and 3, which is more sensible
str(GSS35imputed.rake.trimmed)

# Compare the data before and after 
prop.table(svytable(~Ethnicity, GSS35imputed.unweighted))
prop.table(svytable(~Ethnicity, GSS35imputed.rake.trimmed))
prop.table(svytable(~Ethnicity2, GSS35imputed.unweighted))
prop.table(svytable(~Ethnicity2, GSS35imputed.rake.trimmed))


# Comparing svy mean and total in raked data
svymean(~Ethnicity, GSS35imputed.rake.trimmed)
svytotal(~Ethnicity, GSS35imputed.rake.trimmed)
svymean(~Ethnicity2, GSS35imputed.rake.trimmed)
svytotal(~Ethnicity2, GSS35imputed.rake.trimmed)

svymean(~Ethnicity, GSS35imputed.unweighted) # and in unweighted data
svytotal(~Ethnicity, GSS35imputed.unweighted)
svymean(~Ethnicity2, GSS35imputed.unweighted) # and in unweighted data
svytotal(~Ethnicity2, GSS35imputed.unweighted)

# Creating weights object in original GSS35imputed dataframe so we can use in our regressions:
GSS35imputed$WeightsRaceOLD <- GSS35imputed$WeightsRace
GSS35imputed$WeightsRace <- weights(GSS35imputed.rake.trimmed)
waldo::compare(GSS35imputed$WeightsRaceOLD, GSS35imputed$WeightsRace)

write_rds(GSS35imputed, "GSS_V_2020_imputed.R") # Make sure you are adding the right weights if there are different census weights available for 2020 or 2021 surveys. 
save(GSS35imputed.rake.trimmed, file = "RakeddataGSS35.RData")

```

## Switching between renames

```{r Quick Renames block for ggeffect and imputation}
GSS35 %<>% rename(PoB = `Place of birth`, PoliticalInterest=`Political interest`, stdPoliceConfidence=`standardized police confidence`)

GSS35 %<>% rename(`Place of birth`= PoB, `Political interest` = PoliticalInterest, `standardized police confidence`= stdPoliceConfidence)

```

# Analysis with finalized cleaned weighted and imputed data

```{r Summaries}
# Clear environment
# rm(list = ls())

# Import data set
GSS35imputed <- readRDS("GSS_V_2020_imputed.R")

# Create mode function
getMode <- function(GSS35) {
  ux <- na.omit(unique(GSS35))
  ux[which.max(tabulate(match(GSS35, ux)))]
}

# Remove id and weights variables
GSS35 <- GSS35imputed %>% 
  select(-recid, -WGHT_PER, -WeightsRace)

Mode <- apply(GSS35 %>% select(where(is.factor)), 2, getMode)

p <- GSS35imputed %>% select_if(is.factor) %>% map_GSS35(~(tabyl(.) %>% adorn_totals("row") %>% adorn_pct_formatting()))
View(p)
p %>% flextable() %>% 
  save_as_docx(path = "univariates2GSSV2020.docx")

GSS35imputed2020 %>% select_if(is.factor) %>% colnames()

GSS35imputed %>% 
  select_if(is.numeric) %>% 
  map(~(sd(.)))

# Create dummy variables
dummies::dummy(GSS35imputed$Cohort) %>% psych::describe(., fast = TRUE)
dummies::dummy(GSS35imputed$Regions) %>% psych::describe(., fast = TRUE)
dummies::dummy(GSS35imputed$Education) %>% psych::describe(., fast = TRUE)

descriptives <- GSS35 %>% 
  summarise_all(.funs = list(
    `Mean or mode` = function(x)ifelse(is.numeric(x), sprintf("%.3f", mean(x, na.rm = TRUE)), as.character(getMode(x))), 
    SD = function(x)ifelse(is.numeric(x), round(sd(x, na.rm = TRUE), 2), NA_real_), 
    `Min or first level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", min(x, na.rm = TRUE)), levels(x)[1]), 
    `Max or last level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", max(x, na.rm = TRUE)), levels(x)[length(levels(x))]), 
    n = function(x)sum(!is.na(x))
  )) %>% 
  pivot_longer(everything(),
        names_to = c("Variable", ".value"),
        names_pattern = "(.+)_(.+)")

# Load library
library(gt)
colnames(GSS35imputed2020)
descriptives %>% flextable() %>% 
  save_as_docx(path = "descriptives GSS V 2020.docx")
descriptives2020 %>% gt::gt(rownames_to_stub = TRUE) %>%
  tab_header(title = "Descriptive statistics", subtitle = "GSS 2020") %>% 
  tab_source_note("SD NA for categorical variables") #%>% gtsave("descriptives GSS V 2020.tex")

```

```{r Saving output}
# One
GSS35 %>% 
  tabyl(`var`) %>% 
  gt() %>% 
  gtsave(filename = "var.tex")

GSS35 %>% 
  tabyl(var) %>% 
  flextable() %>% 
  save_as_docx(path = "var.docx")

# Two/three way
GSS35 %>% 
  tabyl(var1, var2) %>% 
  gt() %>% 
  gtsave(filename = "vars.tex")

GSS35 %>% 
  tabyl(var1, var2) %>%  
  flextable() %>% 
  save_as_docx(path = "vars.docx")

# Print one-way table for every (non-numeric) variable 
GSS35 %>%  
   select(!where(is.numeric)) %>% # Or don't select anything for all, or is.numeric for numeric but that's usually overkill, unless you really think there is a weird distribution we should see. 
   imap(~ {variable <- .y
   tabyl(.) %>% 
     rename_with(~ variable, 1) %>% 
     adorn_pct_formatting(.) %>%
     #gt(.) %>% gtsave(str_c(variable, ".tex"))
     save_as_docx(path = str_c(variable, ".docx"))
    })
```

Note: Figures are saved by running cairo_pGSS35 first, then the ggplot code, then dev.off. Run without cairo/dev.off first to make sure everything runs well, that the titles make sense, etc and flag any issue. No need to keep saving wrong plots. When you are sure they are good to save, run cairo first then turn off saving with dev.off. 

```{r Fig 1 Grouped means differences}
GSS35imputed <- read_rds("GSS_V_2020_imputed.R")
GSS35 <- read_rds("GSS_V_2020.R")
GSS35$Income %<>% fct_rev()

GSS35imputed %<>% mutate(BlackWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Black" ~ "Black", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
GSS35imputed %<>% mutate(IndigenousWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Indigenous" ~ "Indigenous", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
GSS35imputed %<>% mutate(oPoCWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Other Person of Colour"~ "Other visible minority", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
GSS35imputed$BlackWhite %>% tabyl %>% adorn_pct_formatting()
GSS35imputed$IndigenousWhite %>% tabyl %>% adorn_pct_formatting()
GSS35imputed$oPoCWhite %>% tabyl %>% adorn_pct_formatting()
GSS35imputed.weighted <- survey::svydesign(ids= ~WeightsRace, weights = ~WeightsRace, data = GSS35imputed) # ids=~to the Weights variable
#t1W <- svyttest(stdPoliceConfidence~Ethnicity2, design = GSS35imputed) GSS35 is wrong and so is DV name
GSS35imputed.weighted$variables %>% names()
t1W <- svyttest(`standardized police confidence`~Ethnicity2, design = GSS35imputed.weighted)
#t2W <- svyttest(`standardized police confidence`~Ethnicity, design = GSS35imputed.weighted)
t2B <- svyttest(`standardized police confidence`~BlackWhite, design = GSS35imputed.weighted)
t2I <- svyttest(`standardized police confidence`~IndigenousWhite, design = GSS35imputed.weighted)
t2V <- svyttest(`standardized police confidence`~oPoCWhite, design = GSS35imputed.weighted)
t2W <- svyglm(`standardized police confidence`~Ethnicity, design = GSS35imputed.weighted)#This does not graph
t3W <- svyttest(`standardized police confidence`~Income, design = GSS35imputed.weighted)
t4W <- svyttest(`standardized police confidence`~`Place of birth`, design = GSS35imputed.weighted)
t5W <- svyttest(`standardized police confidence`~Gender, design = GSS35imputed.weighted)
tW_list <- list(t1W, t2B, t2I, t2V, #t2W, 
                t3W, t4W, t5W)

t1W <- svyttest(`Confidence police rescaled`~Ethnicity2, design = GSS35imputed.weighted)
#t2W <- svyttest(`Confidence police rescaled`~Ethnicity, design = GSS35imputed.weighted)
t2B <- svyttest(`Confidence police rescaled`~BlackWhite, design = GSS35imputed.weighted)
t2I <- svyttest(`Confidence police rescaled`~IndigenousWhite, design = GSS35imputed.weighted)
t2V <- svyttest(`Confidence police rescaled`~oPoCWhite, design = GSS35imputed.weighted)
t2W <- svyglm(`Confidence police rescaled`~Ethnicity, design = GSS35imputed.weighted)#This does not graph
t3W <- svyttest(`Confidence police rescaled`~Income, design = GSS35imputed.weighted)
t4W <- svyttest(`Confidence police rescaled`~`Place of birth`, design = GSS35imputed.weighted)
t5W <- svyttest(`Confidence police rescaled`~Gender, design = GSS35imputed.weighted)
tW_list <- list(t1W, t2B, t2I, t2V, #t2W, 
                t3W, t4W, t5W)

t_GSS35W <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tW_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_GSS35W <- rbind(t_GSS35W, vec)
}

t_GSS35W %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)",
                               "Income", "Place of birth", "Gender"),
                  Model = "GSS 2020", Outcome = "Single-item")

names(t_GSS35W) = c("mean", "lower", "upper", "Variable", "Model", "Outcome")

GSS35 %<>% rename("ResponseId"="recid")
GSS35 <- left_join(GSS35, GSS35imputed %>% select(ResponseId, `Confidence police rescaled`, `Confidence police orig`, WeightsRace), by = "ResponseId")
ICC %<>%drop_na(`Confidence police rescaled`) 

GSS35unimputed.weighted <- survey::svydesign(ids= ~WGHT_PER, weights = ~WGHT_PER, data = GSS35) # ids=~to the Weights variable

t1WU <- svyttest(`Confidence police rescaled`~Ethnicity2, design = GSS35unimputed.weighted)
#t2W <- svyttest(`Confidence police rescaled`~Ethnicity, design = GSS35unimputed.weighted)
t2BU <- svyttest(`Confidence police rescaled`~BlackWhite, design = GSS35unimputed.weighted)
t2IU <- svyttest(`Confidence police rescaled`~IndigenousWhite, design = GSS35unimputed.weighted)
t2VU <- svyttest(`Confidence police rescaled`~oPoCWhite, design = GSS35unimputed.weighted)
t2WU <- svyglm(`Confidence police rescaled`~Ethnicity, design = GSS35unimputed.weighted)#This does not graph
t3WU <- svyttest(`Confidence police rescaled`~Income, design = GSS35unimputed.weighted)
t4WU <- svyttest(`Confidence police rescaled`~`Place of birth`, design = GSS35unimputed.weighted)
t5WU <- svyttest(`Confidence police rescaled`~Gender, design = GSS35unimputed.weighted)
tW_listU <- list(t1WU, t2BU, t2IU, t2VU, #t2W, 
                t3WU, t4WU, t5WU)

t_GSS35WU <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tW_listU) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_GSS35WU <- rbind(t_GSS35WU, vec)
}

t_GSS35WU %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)", "Income", "Place of birth", "Gender"),
                  Model = "GSS 2020 unimputed", Outcome = "Single-item")

names(t_GSS35WU) = c("mean", "lower", "upper", "Variable", "Model", "Outcome")


cairo_pdf("Figure 1 GSS v2020_releveled income.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_GSS35WU %>% 
  ggplot(aes(x = Variable%>%fct_relevel("Income", "Gender", "Place of birth", "other PoC (ref White)","Indigenous (ref White)","Black (ref White)","Grouped PoC (ref White)"), y = mean, color =  Model)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly
  scale_x_discrete(name = "", labels=c("Place of birth" = "Immigrant\n(ref Canadian-born)",
                            #"Income" = "Above poverty line\n(ref below)",
                            "Gender" = "Gender (ref male)",
                            "Ethnicities" = "4-category\nEthnicities (ref White)",
                            "Binary Ethnicity" = "Person of colour\n(ref White)"))
 dev.off() # If cairo_pdf was created.

GSS35imputed %$% levels(Income)
t_GSS35W %>%as_tibble() %>%  flextable() %>% 
  save_as_docx(path = "ttests weighed GSSV2020_binary race.docx")

GSS35imputed %$% levels(Ethnicity)

dwplot(t2W)
summary(t2W) #output to excel
```


#ICC

```{r setup, include = FALSE}
# Clear environment
rm(list = ls())
# Get packages
.libPaths("L:/_Rpackages/4.0")
# library()
# Set chunk options
knitr::opts_chunk$set(echo = TRUE, message = FALSE)

# Set working directory

# Load packages
# Import data
ICC <- haven::read_dta("S:/ICC_RCC_ED_2020/ICC_RCC_ED_2020_v1/data_donnees/stata/icc_ed_2020_f1_v1.dta")

# Check
# head(ICC)
# glimpse(ICC)

# Import imputed dataset 
ICCimputed <- read_rds("ICC_2020_imputed.R")

theme <- theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
       aspect.ratio = 1,
       legend.position="top", #legend.position="right",
       panel.background = element_rect(fill = "white"),
       #plot.margin = margin (0.1,0.1,0.1,0.1, "cm"),
       strip.background = element_blank(),
       panel.grid.minor=element_blank(),
       panel.grid.major= element_line(color = "seashell1"),
       plot.background=element_blank(),
       title = element_text(size = 26),
#    axis.text.x = element_blank(), #remove tickmarks
    axis.title.x = element_text(size = 20, colour = "black"),
    axis.title.y = element_text(size = 20, colour = "black"),
    legend.title = element_text(size = 18, colour = "black"),
    legend.text = element_text(size = 16, colour = "black"),
axis.text = element_text(size = 16)
)
```

# Recode Variables

## DVs

```{r DV standardized confidence, include=FALSE}
# Examine trust in police variable
str(ICC$tii_05a) # numeric
table(ICC$tii_05a)

# Note: It is coded from 1 to 5 where 1 means "no trust at all" and 5 means " a great deal of trust"

# Recode "Not stated" category for all trust variables
ICC %<>% 
  mutate(tii_05a = ifelse(tii_05a == 9, NA, tii_05a),
         tii_05b = ifelse(tii_05b == 9, NA, tii_05b),
         tii_05c = ifelse(tii_05c == 9, NA, tii_05c),
         tii_05d = ifelse(tii_05d == 9, NA, tii_05d),
         tii_05e = ifelse(tii_05e == 9, NA, tii_05e),
         tii_05f = ifelse(tii_05f == 9, NA, tii_05f),
         tii_05g = ifelse(tii_05g == 9, NA, tii_05g),
         tii_05h = ifelse(tii_05h == 9, NA, tii_05h),
         tii_05i = ifelse(tii_05i == 9, NA, tii_05i),
         tii_05j = ifelse(tii_05j == 9, NA, tii_05j),
         tii_05k = ifelse(tii_05k == 9, NA, tii_05k),
         tii_05l = ifelse(tii_05l == 9, NA, tii_05l),
         tii_05m = ifelse(tii_05m == 9, NA, tii_05m))

# Check
apply(ICC[c(65:77)], MARGIN = 2, FUN = table)

# Create standardized police confidence variable
ICC %<>%
  mutate(`standardized police confidence` = scales::rescale(tii_05a - (select(., starts_with("tii_")) %>% rowMeans(na.rm = TRUE))))

# Check
head(ICC$`standardized police confidence`)
ICC%<>%rename(`raw police confidence`=tii_05a)
tabyl(ICC$tii_05m)

```


```{r Fig 1}
ICC$`raw police confidence` %>% tabyl

ICCimputed$`raw police confidence` %>% summary

#Rescale to flip directionality and make 0 to 1 
ICC$`Confidence police rescaled` <-  
  recode(ICC$`raw police confidence`, #Orig answers #tii_05a 1 means "no trust at all" and 5 means " a great deal of trust"
                "1" = 0,
                "2" = 1/4,
                "3" = 2/4,
                "4" = 3/4,
                "5" = 1,
         .missing = NA_real_, .default = NA_real_)
ICC$`Confidence police rescaled` %>% tabyl

ICC$`Confidence police orig` <- recode_factor(ICC$`raw police confidence`,
                "1" = "No trust at all",
                "2" = "2",
                "3" = "3",
                "4" = "4",
                "5" = "A great deal",
                "9" = NA_character_,
           .missing = NA_character_)
ICC %>% tabyl(`Confidence police orig`, `Confidence police rescaled`)
ICC$recid %>% head
waldo::compare(ICC$masterid, ICCimputed$masterid)

ICCimputed <- left_join(ICCimputed, ICC %>% select(masterid, `Confidence police rescaled`, `Confidence police orig`), by = "masterid")
ICCimputed$Regions %>% tabyl
ICCimputed$Regions %<>% recode("Prairie" = "West")
ICCimputed %<>% rename("ResponseId"="masterid")
ICCimputed$diffscore <- ICCimputed$`Confidence police rescaled`-ICCimputed$`standardized police confidence`

#missingness summaries
colnames(ICCimputed)
ICC %<>% rename(Age = dem_05)
ICC%$%summary(Age)
ICCimputed%$%summary(Regions)
ICCimputed%$%summary(diffscore)
ICCimputed%$%summary(`standardized police confidence`)

ICCimputed %>% select(-ResponseId) %>% select_if(is.factor) %>% map(~(tabyl(.) %>% adorn_totals("row") %>% adorn_pct_formatting()))

ICCimputed %>% select(-ResponseId) %>% select_if(is.numeric) %>% map(~(summary(.)))

ICCimputed %>% mutate(diffscore = `Confidence police rescaled`-`standardized police confidence`, 
                        #Category = recode(Gender, "Women trans and non-binary"="WTNB")) %>% 
                     ) %>% rename(Category = `Confidence police orig`) %>% #change variable
  drop_na(diffscore) %>% 
  group_by(Category) %>% 
  group_modify(~ mean_se(.x$diffscore, mult = 1.96)) %>% 
  ggplot(aes(x = Category, y = y))+ #If cat = `Confidence police orig` Category %>% fct_rev() or fct_relevel("Black", after = 3L) 
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.9)) +
#  geom_col(position = position_dodge(0.9)) + 
    geom_point(position = position_dodge(0.9), size=4, shape=21)+
#    geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_y_continuous("confidence in 0 to 1 scale"#, limits = c(0, NA)
                       )+ #yscale contiuous labels = scales::percent
  scale_x_discrete("Police confidence response")+ #Ethnicity, Police confidence response
  #facet_wrap(Survey~.)+
  theme()+
  ggtitle("Difference between single-item & \nstandardized confidence in police")
          #, subtitle = "based on substantive response") 


#Go to raked data making

#Standardized version
t1s <- svyttest(`standardized police confidence`~Ethnicity, design = ICCimputed.rake.trimmed)
t2s <- svyttest(`standardized police confidence`~Gender, design = ICCimputed.rake.trimmed) 
t4s <- svyttest(`standardized police confidence`~`Place of birth`, design = ICCimputed.rake.trimmed)
ts_list <- list(t1s, t2s, t4s)


t_dfs <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in ts_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfs <- rbind(t_dfs, vec)
}

t_dfs %<>% mutate(Variable = c("Ethnicity", "Gender",  "PoB"), 
                  Estimate = "Normalized")

names(t_dfs) = c("mean", "lower", "upper", "Variable", "Police confidence \nestimate")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_dfs %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police ICC")+ #adjust limits accordingly
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)")) 

#dev.off() #If cairo_pdf was created.

#Rake data
##Recode for rescaled unstandardized
t1u <- svyttest(`Confidence police rescaled`~Ethnicity, design = ICCimputed.rake.trimmed)
t2u <- svyttest(`Confidence police rescaled`~Gender, design = ICCimputed.rake.trimmed)
t4u <- svyttest(`Confidence police rescaled`~`Place of birth`, design = ICCimputed.rake.trimmed)
tu_list <- list(t1u, t2u, t4u)

t_dfu <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tu_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfu <- rbind(t_dfu, vec)
}

t_dfu %<>% mutate(Variable = c("Ethnicity", "Gender", "PoB"), 
                  Estimate = "Single item")

names(t_dfu) = c("mean", "lower", "upper", "Variable", "Police confidence \nestimate")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_dfu %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police ICC")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)"))

bind_rows(t_dfs, t_dfu) %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police, ICC")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)"))
#dev.off() #If cairo_pdf was created.

#No statscan Weights
#SIICCStatsCanWeights <- ICCimputed %$% lm(`Confidence police rescaled`~ Ethnicity+ `Place of birth`+Gender +
                                           # Cohort + Regions + Education, weights = WGHT_PER) 
SIICCRakedWeights <- ICCimputed %$% lm(`Confidence police rescaled`~ Ethnicity+ `Place of birth`+Gender +
                                        Cohort + Regions + Education, weights = WeightsRace) 

#ICCStatsCanWeights <- ICCimputed %$% lm(`standardized police confidence`~ Ethnicity+ `Place of birth`+Gender +
                                         # Cohort + Regions + Education, weights = WGHT_PER) 
ICCRakedWeights <- ICCimputed %$% lm(`standardized police confidence`~ Ethnicity+ `Place of birth`+Gender +
                                      Cohort + Regions + Education, weights = WeightsRace) 

#SIBINICCStatsCanWeights <- ICCimputed %$% lm(`Confidence police rescaled`~ Ethnicity2 +`Place of birth`+Gender +
                                            # Cohort + Regions + Education, weights = WGHT_PER) 
SIBINICCRakedWeights <- ICCimputed %$% lm(`Confidence police rescaled`~ Ethnicity2+ `Place of birth`+Gender +
                                         Cohort + Regions + Education, weights = WeightsRace) 

#BINICCStatsCanWeights <- ICCimputed %$% lm(`standardized police confidence`~ Ethnicity2+ `Place of birth`+Gender +
                                            # Cohort + Regions + Education, weights = WGHT_PER) 
BINICCRakedWeights <- ICCimputed %$% lm(`standardized police confidence`~ Ethnicity2+ `Place of birth`+Gender +
                                         Cohort + Regions + Education, weights = WeightsRace) 

stargazer(ICCRakedWeights, SIICCRakedWeights, #BINICCRakedWeights, SIBINICCRakedWeights,
          column.labels = c("Standardized", "Single-item"), type = "latex", 
          title = "ICC",
          covariate.labels=c("Black", "Indigenous", "Other PoC", 
                             "Born outside Canada", "WT&NB", 
                             "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "West", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"), notes = "Raked weights")

stargazer(ICCStatsCanWeights, SIICCStatsCanWeights, type = "latex",
          title = "Standardized police confidence DV, OLS", column.labels = c("Statscan Weight", "Raked Weight"),
          covariate.labels=c("Black", "Indigenous", "Other Person of Colour", 
                             "Below poverty line", "Born outside Canada", "WT&NB", 
                             "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "Prairies", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"))

stargazer(BINICCStatsCanWeights, BINICCRakedWeights, 
          type = "latex", title = "Standardized police confidence DV, Binary race IV", column.labels = c("Statscan Weight", "Raked Weight"), 
          covariate.labels=c("Persons of Colour (grouped)", 
                             "Below poverty line", "Born outside Canada", "WT&NB", "Gen X", "Boomer", "Silent", #"Age",  "$Age^2$",
                             "Prairies", "BC", "Atlantic", "North", "Quebec",
                             "Medium Ed", "High Ed"))
# Create data set
write_rds(ICCimputed, "ICC_2020_imputed.R")
save(ICCimputed.rake.trimmed, file = "RakeddataICC.RData")
```

## IVs

```{r Race recode, include=FALSE}
# Explore race variables
str(ICC$vismin) # numeric
table(ICC$vismin)

str(ICC$iident) # numeric
table(ICC$iident)
ICC$iil

str(ICC$iiICClag) # numeric
tabyl(ICC$iidflag)
table(ICC$iiICClag)

# Replace numeric code with appropriate category
ICC %<>%
  mutate(Ethnicity = case_when(
    iidflag == 1 ~ "Indigenous",
    vismin == 3 ~ "Black",
    vismin == 13 & iidflag == 2 ~ "White",
    vismin == 1 | vismin == 2 | vismin == 4 | vismin == 5 | vismin == 6 | vismin == 7 | vismin == 8 | vismin == 9 | vismin == 10 | vismin == 11 | vismin == 12 ~ "Other Person of Colour",
    TRUE ~ NA_character_
  ),
  Ethnicity = factor(Ethnicity, levels = c("White", "Black", "Indigenous", "Other Person of Colour"))) 

# Check recoding
levels(ICC$Ethnicity)
summary(ICC$Ethnicity)
tabyl(ICC$Ethnicity)

# Create another ethnicity variable
ICC$Ethnicity2 <- ICC$Ethnicity

# Lump all PoC into one category 
ICC$Ethnicity2 %<>% 
  fct_lump_n(., n = 1) %>% 
  recode("Other" = "PoC")

# Check
levels(ICC$Ethnicity2)
```

#income missing

```{r Immigration, include=FALSE}
# Rename "Born in Canada" variable dem1_30a
ICC %<>%
  rename("Place of birth" = dem1_30a)

# Recode categories
ICC$`Place of birth` %<>%
  recode("1" = "Canada",
         "2" = "Other",
         "9" = NA_character_) %>% 
  factor()

# Check
head(ICC$`Place of birth`)
summary(ICC$`Place of birth`)

# Summary Statistics
# Canada  Other   NA's 
#  30137   6488     49

# Create Years in Canada variable
ICC %<>% 
  mutate(`Years in Canada` = case_when(
    `Place of birth` == 1 ~ NA_real_, # Born in Canada 
    dem1_35 == 9996 ~ NA_real_, # Skipped question
    dem1_35 == 9999 ~ NA_real_, # Not stated
    TRUE ~ 2020 - dem1_35
  ))

# Check
ICC %>% 
  select(dem1_35, `Years in Canada`)
```


```{r Gender, include=FALSE}
# Examine
table(ICC$gender)

# Rename
ICC %<>% rename(Gender = gender)

# Group Women, trans and non-binary respondents
ICC %<>% 
  mutate(Gender = case_when(
    Gender == 1 ~ "Male",
    Gender == 2 | Gender == 3 ~ "Women trans and non-binary",
    TRUE ~ NA_character_),
    Gender = factor(Gender))
  
# Check
summary(ICC$Gender)
```

## Controls

```{r Age and Cohorts, include=FALSE}
# Rename variable
ICC %<>% rename(Age = dem_05)

# Create Cohort variable
ICC %<>% 
  mutate(Cohort = case_when(
    Age <18 ~ "Minors", 
    Age %in% c(18:24) ~ "GenZ and Millennial", 
                      Age %in% c(25:40) ~ "GenZ and Millennial", 
                      Age %in% c(41:56) ~ "Gen X", 
                      Age %in% c(57:75) ~ "Ok Boomer", 
                      Age > 75 ~ "Silent", 
                      TRUE ~ NA_character_
                      ))

# Check
tabyl(ICC$Cohort)
str(ICC$Cohort)

# # Relevel cohort variable
ICC$Cohort %<>%
  fct_relevel(ICC$Cohort, "GenZ and Millennial", "Gen X", "Ok Boomer", "Silent")

# Check
summary(ICC$Cohort)

```


```{r Education, include=FALSE}
# Create Education variable
ICC %<>% mutate( 
    Education = case_when(
      ed_05 %in% c(1:2) ~ "Low", # Up to (including) Completed secondary/ high school
      ed_05 %in%  c(3:5) ~ "Medium", # Trades, technical, college, university certificate/diploma below bachelor's
      ed_05 %in%  c(6:7) ~ "High", # Bachelor's degree and higher
      TRUE ~ NA_character_) %>%  
      fct_relevel(., "Low", "Medium", "High"))

# Check
ICC%$%summary(Education)
```


```{r Language}
# Recode language
ICC %<>%
  rename(Language = resplang) %>% 
  mutate(Language = ifelse(Language == 2, "English", "French"))

# Check
table(ICC$Language)
```


```{r Province, include=FALSE}
# Examine
table(ICC$prov)

# Recode categories
ICC %<>% 
  mutate(prov = case_when(
         prov == 10 ~ "Newfoundland and Labrador",
         prov == 11 ~ "Prince Edward Island",
         prov == 12 ~ "Nova Scotia",
         prov == 13 ~ "New Brunswick",
         prov == 24 ~ "Quebec",
         prov == 35 ~ "Ontario",
         prov == 46 ~ "Manitoba",
         prov == 47 ~ "Saskatchewan",
         prov == 48 ~ "Alberta", 
         prov == 59 ~ "British Columbia",
         prov == 60 ~ "Yukon",
         prov == 61 ~ "Northwest Territories",
         prov == 62 ~ "Nunavut"))

# Rename variable
ICC %<>% rename(Province = prov)

# Reorder categories
ICC$Province %<>% fct_relevel(.,"Ontario")


# Check
levels(ICC$Province)
summary(ICC$Province)

# Create Regions variable
ICC$Regions <- ICC$Province

# Grouping in appropriate regions
ICC$Regions %<>% 
  fct_collapse(., "Atlantic" = c("Newfoundland and Labrador", "Prince Edward Island", "Nova Scotia", "New Brunswick"), 
                  "Prairie" = c("Manitoba","Saskatchewan","Alberta") ,
                  "North" = c("Northwest Territories", "Yukon", "Nunavut"))

# Check
summary(ICC$Regions)
```


```{r Final prep, include=FALSE}
# Create dataframe with variables of interest
ICC %<>%
  select(., Ethnicity, Ethnicity2, `Place of birth`, Gender, Age, Cohort, Education, Language, Province, Regions, `standardized police confidence`, masterid)

# Check
head(ICC)

# Change character variables to factor
ICC %<>% 
  mutate_if(is.character, factor)

# Create dataset
write_rds(ICC, "ICC_2020.R") 
```

#### Imputation 


```{r missingness}
naniar::miss_var_summary(ICC) %>% mutate_if(is.numeric, round,2) %>% gt::gt()

```

#lacks income

```{r Imputation process, include=FALSE}
# Import dataset
ICC <- readRDS("ICC_2020.R")

# Load package
library(mice)

# Rename variables
ICC %<>% 
  rename(PoB= `Place of birth`, stdPoliceConfidence =`standardized police confidence`)

# Check
colnames(ICC)

# Imputation
imp <- mice(ICC, maxit = 0) 
meth <- imp$method
predM <- imp$predictorMatrix

# Specify which variables should NOT be used for PREDICTION (outcomes, survey weights)

# Variables that should NOT be IMPUTED (outcomes, main IV, survey weights)
predM[, c("stdPoliceConfidence", "Language", "Province", "Regions", "masterid")] = 0

# Note that diff methods must be used to impute continuous, binary, and ordinal vars
# Specify which methods for the diff variables

# None for the ones that shouldn't be imputed
meth[c("stdPoliceConfidence", "Language", "Province", "Regions", "masterid")] = ""

# Numeric variables
meth[c("Age")] = "pmm" # numeric
meth[c("Ethnicity2", "PoB")] = "logreg" # dummy
meth[c("Ethnicity", "Gender")] = "polyreg" # Unordered categorical factor
meth[c("Cohort",  "Education")] = "polr" # Ordered categorical factor

#### Run the multiple (m = 5) imputation
set.seed(1986)

# Select and run the next three lines together to see how quickly the impute
lubridate::now()
imputed <- mice(ICC, method = meth, predictorMatrix = predM, m = 5) 
lubridate::now()

# Start: 2022-02-02 10:44:21 PST
# End: 2022-02-02 10:47:58 PST
# Total: ~4 minutes

# Create dataframe after imputation
ICCimputed <- mice::complete(imputed)

# Check for missingness
test <- ICCimputed %>% 
  map_ICC(~(sum(is.na(.x)))) %>% #/length(x) #as proportion of length
  as_tibble() %>% 
  transpose(.) %>% 
  simplify_all() 

names(test) <- "Imputed Variables"

# Check
str(ICCimputed)

# Rename variables
ICCimputed %<>% 
  rename(`Place of birth`= PoB, `standardized police confidence`= stdPoliceConfidence)

# Create dataset
write_rds(ICCimputed, "ICC_2020_imputed.R")
```

# Weighing data

```{r Reweighting}
# Import data
ICCimputed <- readRDS("ICC_2020_imputed.R")

# Computing survey weights
# https://www.r-bloggers.com/survey-computing-your-own-post-stratification-weights-in-r/

# Load package
library(survey)

# Create the unweighted survey object
ICCimputed.unweighted <- survey::svydesign(ids = ~1, data = ICCimputed) # ids = ~1 means we don't have any weights

# Create the population data frames
# Provinces
table(ICCimputed$Regions)

# Create provinces data set
provinces <- read_csv("//mcg-main/equipes/Projet 7107/Isadora/census_2016_population.CSV")

# Separate into different regions
provinces %<>%
  select(`Geographic name`, `Population, 2016`) %>% 
  drop_na(`Population, 2016`) %>% 
  mutate(Regions = `Geographic name`)

provinces$Regions %<>% 
  fct_collapse(., "Atlantic" = c("Newfoundland and Labrador", "Prince Edward Island", "Nova Scotia", "New Brunswick"), 
                  "Prairie" = c("Manitoba","Saskatchewan","Alberta") ,
                  "North" = c("Northwest Territories", "Yukon", "Nunavut"))

# Check
head(provinces)

# Remove Canada category Note: Population = 35151728 #POPULATION DATA FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
provinces %<>% 
  select (-`Geographic name`) %>%
  filter(Regions != "Canada") %>% 
  group_by(Regions) %>%
  summarise(sum = sum(`Population, 2016`),
            prop = sum(`Population, 2016`)/35151728,  #POPULATION DATA FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA
            Freq = nrow(ICCimputed) * prop)

# Check
head(provinces)

# Regions

region.dist <- data.frame(Regions = c("Ontario", "West", "British Columbia", "Atlantic", "North", "Quebec"), Freq = nrow(ICCimputed)*c(0.383, 0.183, 0.132, 0.0664, 0.003231818, 0.232))

# Gender
gender.dist <- data.frame(Gender = c("Women trans and non-binary", "Male"),
                          Freq = nrow(ICCimputed)*c(0.514240946, 0.485759054)) #POPULATION DATA FOR WEIGHTS COMES FROM STATSCAN CENSUS PUBLICL AVAILABLE DATA

gender.dist

# Cohort
cohort.dist <- data.frame(Cohort = c("GenZ and Millennial", "Gen X", "Ok Boomer", "Silent"),
                          Freq = nrow(ICCimputed) * c(0.354978037, 0.278681465, 0.27593917, 0.090401327))
cohort.dist

# Education
edu.dist <- data.frame(Education = c("Low", "Medium", "High"),
                       Freq = nrow(ICCimputed) * c(0.352, 0.363, 0.285))

edu.dist

# Race

race.dist <- data.frame(Ethnicity = c("White","Black", "Indigenous", "Other Person of Colour"),
                       Freq = nrow(ICCimputed) * c(0.685, 0.043, .05,0.22)) #All categories

racebin.dist <- data.frame(Ethnicity2 = c("White","PoC"),
                       Freq = nrow(ICCimputed) * c(0.685, 0.315)) #The category frequencies were frequencies were flipped

race.dist

# Compute the weights by putting together the marginal distributions from before with our data
ICCimputed.rake <- rake(design = ICCimputed.unweighted,
                        sample.margins = list(~Regions, ~Gender, ~Cohort, ~Education,
                                              ~Ethnicity,~Ethnicity2),
                        population.margins = list(region.dist, gender.dist, cohort.dist, edu.dist, race.dist, racebin.dist))

# Trim the weights
summary(weights(ICCimputed.rake)) # Some of the weights are much too large & too small
ICCimputed.rake.trimmed <- trimWeights(ICCimputed.rake, lower=0.3, upper=3, strict=TRUE)
summary(weights(ICCimputed.rake.trimmed)) # Now all the weights range between 0.3 and 3, which is more sensible
str(ICCimputed.rake.trimmed)

# Compare the data before and after 
prop.table(svytable(~Ethnicity, ICCimputed.unweighted))
prop.table(svytable(~Ethnicity, ICCimputed.rake.trimmed))
prop.table(svytable(~Ethnicity2, ICCimputed.unweighted))
prop.table(svytable(~Ethnicity2, ICCimputed.rake.trimmed))

# Comparing svy mean and total in raked data
svymean(~Ethnicity2, ICCimputed.rake.trimmed)
svytotal(~Ethnicity2, ICCimputed.rake.trimmed)

svymean(~Ethnicity2, ICCimputed.unweighted) # and in unweighted data
svytotal(~Ethnicity2, ICCimputed.unweighted)

# Creating weights object in original ICCimputed dataframe so we can use in our regressions:
ICCimputed$WeightsRace <- weights(ICCimputed.rake.trimmed)
write_rds(ICCimputed, "ICC_2020_imputed.R")
save(ICCimputed.rake.trimmed, file = "RakeddataICC.RData")

```

## Switching between renames

```{r Quick Renames block for ggeffect and imputation}
# Rename variables
ICC %<>% rename(`Place of birth`= PoB, `standardized police confidence`= stdPoliceConfidence)

# Check
ICC
```

# Analysis with finalized cleaned weighted and imputed data

```{r Summaries}
# Import imputed data set
ICCimputed <- read_rds("ICC_2020_imputed.R")

getMode <- function(ICC) {
  ux <- na.omit(unique(ICC))
  ux[which.max(tabulate(match(ICC, ux)))]
}

ICC <- ICCimputed %>% 
  select(-masterid, -WeightsRace)

Mode <- apply(ICC%>% select(where(is.factor)), 2, getMode)

p <- ICCimputed %>% select_if(is.factor) %>% map_df(~(tabyl(.) %>% adorn_totals("row") %>% adorn_pct_formatting()))


View(p)
p %>% flextable() %>% 
  save_as_docx(path = "univariates2ICCED2020.docx")

# Error running the code below
# ICCimputed %>% select_if(is.factor)%>% map(~(sd(.))) 

# Create dummy variables
dummies::dummy(ICCimputed$Cohort) %>% psych::describe(., fast = TRUE)
dummies::dummy(ICCimputed$Regions) %>% psych::describe(., fast = TRUE)
dummies::dummy(ICCimputed$Education) %>% psych::describe(., fast = TRUE)

descriptives <- ICC %>% 
  summarise_all(.funs = list(
    `Mean or mode` = function(x)ifelse(is.numeric(x), sprintf("%.3f", mean(x, na.rm = TRUE)), as.character(getMode(x))), 
    SD = function(x)ifelse(is.numeric(x), round(sd(x, na.rm = TRUE), 2), NA_real_), 
    `Min or first level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", min(x, na.rm = TRUE)), levels(x)[1]), 
    `Max or last level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", max(x, na.rm = TRUE)), levels(x)[length(levels(x))]), 
    n = function(x)sum(!is.na(x))
  )) %>% 
  pivot_longer(everything(),
        names_to = c("Variable", ".value"),
        names_pattern = "(.+)_(.+)")

# Load library
library(gt)

descriptives %>% flextable() %>% 
  save_as_docx(path = "descriptives ICC ED 2020.docx")
descriptives %>% gt::gt(rownames_to_stub = TRUE) %>%
  tab_header(title = "Descriptive statistics", subtitle = "ICC ED 2020") %>% 
  tab_source_note("SD NA for categorical variables") %>% gtsave("descriptives ICC ED 2020.tex")
```


```{r Saving output}
# One
ICC %>% 
  tabyl(`var`) %>% 
  gt() %>% 
  gtsave(filename = "var.tex")

ICC %>% 
  tabyl(var) %>% 
  flextable() %>% 
  save_as_docx(path = "var.docx")

# Two/three way
ICC %>% 
  tabyl(var1, var2) %>% 
  gt() %>% 
  gtsave(filename = "vars.tex")

ICC %>% 
  tabyl(var1, var2) %>%  
  flextable() %>% 
  save_as_docx(path = "vars.docx")

# Print one-way table for every (non-numeric) variable 
ICC %>%  
   select(!where(is.numeric)) %>% # Or don't select anything for all, or is.numeric for numeric but that's usually overkill, unless you really think there is a weird distribution we should see. 
   imap(~ {variable <- .y
   tabyl(.) %>% 
     rename_with(~ variable, 1) %>% 
     adorn_pct_formatting(.) %>%
     #gt(.) %>% gtsave(str_c(variable, ".tex"))
     save_as_docx(path = str_c(variable, ".docx"))
    })
```

Note: Figures are saved by running cairo_pICC first, then the ggplot code, then dev.off. Run without cairo/dev.off first to make sure everything runs well, that the titles make sense, etc and flag any issue. No need to keep saving wrong plots. When you are sure they are good to save, run cairo first then turn off saving with dev.off.  

```{r Fig 1 Grouped means differences}
# Load library
library(survey)
ICCimputed <- read_rds("ICC_2020_imputed.R")
ICC <- read_rds("ICC_2020.R")
ICCimputed %<>% mutate(BlackWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Black" ~ "Black", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
ICCimputed %<>% mutate(IndigenousWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Indigenous" ~ "Indigenous", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))
ICCimputed %<>% mutate(oPoCWhite = case_when(Ethnicity == "White" ~ "White",
                                             Ethnicity == "Other Person of Colour"~ "Other visible minority", 
                                             TRUE ~ NA_character_) %>% fct_relevel("White", after = 0L))

ICCimputed$BlackWhite %>% tabyl %>% adorn_pct_formatting()
ICCimputed$IndigenousWhite %>% tabyl %>% adorn_pct_formatting()
ICCimputed$oPoCWhite %>% tabyl %>% adorn_pct_formatting()
ICCimputed.weighted <- survey::svydesign(ids= ~WeightsRace, weights = ~WeightsRace, data = ICCimputed) # ids=~to the Weights variable
#t1W <- svyttest(stdPoliceConfidence~Ethnicity2, design = ICCimputed) ICC is wrong and so is DV name
ICCimputed.weighted$variables %>% names()
t1W <- svyttest(`standardized police confidence`~Ethnicity2, design = ICCimputed.weighted)
#t2W <- svyttest(`standardized police confidence`~Ethnicity, design = ICCimputed.weighted)
t2B <- svyttest(`standardized police confidence`~BlackWhite, design = ICCimputed.weighted)
t2I <- svyttest(`standardized police confidence`~IndigenousWhite, design = ICCimputed.weighted)
t2V <- svyttest(`standardized police confidence`~oPoCWhite, design = ICCimputed.weighted)
t2W <- svyglm(`standardized police confidence`~Ethnicity, design = ICCimputed.weighted)#This does not graph
#t3W <- svyttest(`standardized police confidence`~Income, design = ICCimputed.weighted)
t4W <- svyttest(`standardized police confidence`~`Place of birth`, design = ICCimputed.weighted)
t5W <- svyttest(`standardized police confidence`~Gender, design = ICCimputed.weighted)
tW_list <- list(t1W, t2B, t2I, t2V, 
                #t2W, #t3W, 
                t4W, t5W)

t1W <- svyttest(`Confidence police rescaled`~Ethnicity2, design = ICCimputed.weighted)
#t2W <- svyttest(`Confidence police rescaled`~Ethnicity, design = ICCimputed.weighted)
t2B <- svyttest(`Confidence police rescaled`~BlackWhite, design = ICCimputed.weighted)
t2I <- svyttest(`Confidence police rescaled`~IndigenousWhite, design = ICCimputed.weighted)
t2V <- svyttest(`Confidence police rescaled`~oPoCWhite, design = ICCimputed.weighted)
t2W <- svyglm(`Confidence police rescaled`~Ethnicity, design = ICCimputed.weighted)#This does not graph
#t3W <- svyttest(`Confidence police rescaled`~Income, design = ICCimputed.weighted)
t4W <- svyttest(`Confidence police rescaled`~`Place of birth`, design = ICCimputed.weighted)
t5W <- svyttest(`Confidence police rescaled`~Gender, design = ICCimputed.weighted)
tW_list <- list(t1W, t2B, t2I, t2V, 
                #t2W, #t3W, 
                t4W, t5W)

t_ICCW <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tW_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_ICCW <- rbind(t_ICCW, vec)}

t_ICCW %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)",
                               #"Income", 
                               "Place of birth", "Gender"),
                  Model = "ICC ED 2020", Outcome = "Single-item")

names(t_ICCW) = c("mean", "lower", "upper", "Variable", "Model", "Outcome")

ICC %<>% rename("ResponseId"="masterid")
ICC <- left_join(ICC, ICCimputed %>% select(ResponseId, `Confidence police rescaled`, `Confidence police orig`, WeightsRace), by = "ResponseId")
ICC %<>%drop_na(`Confidence police rescaled`) 

ICCunimputed.weighted <- survey::svydesign(ids= ~WeightsRace, weights = ~WeightsRace, data = ICC) # ids=~to the Weights variable
t1WU <- svyttest(`Confidence police rescaled`~Ethnicity2, design = ICCunimputed.weighted)
#t2W <- svyttest(`Confidence police rescaled`~Ethnicity, design = ICCunimputed.weighted)
t2BU <- svyttest(`Confidence police rescaled`~BlackWhite, design = ICCunimputed.weighted)
t2IU <- svyttest(`Confidence police rescaled`~IndigenousWhite, design = ICCunimputed.weighted)
t2VU <- svyttest(`Confidence police rescaled`~oPoCWhite, design = ICCunimputed.weighted)
#t2WU <- svyglm(`Confidence police rescaled`~Ethnicity, design = ICCunimputed.weighted)#This does not graph
#t3W <- svyttest(`Confidence police rescaled`~Income, design = ICCunimputed.weighted)
t4WU <- svyttest(`Confidence police rescaled`~`Place of birth`, design = ICCunimputed.weighted)
t5WU <- svyttest(`Confidence police rescaled`~Gender, design = ICCunimputed.weighted)
tW_listU <- list(t1WU, t2BU, t2IU, t2VU,                #t2W, #t3W, 
                t4WU, t5WU)
t_ICCWU <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tW_listU) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_ICCWU <- rbind(t_ICCWU, vec)}

t_ICCWU %<>% mutate(Variable = c("Grouped PoC (ref White)", "Black (ref White)", "Indigenous (ref White)", "other PoC (ref White)", #"Income", 
                                 "Place of birth", "Gender"), Model = "ICC ED 2020 unimputed", Outcome = "Single-item")
names(t_ICCWU) = c("mean", "lower", "upper", "Variable", "Model", "Outcome")


#cairo_pdf("Figure 1 ICC ED 2020.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_ICCWU %>% 
  ggplot(aes(x = Variable%>%fct_relevel("Gender", "Place of birth", "other PoC (ref White)","Indigenous (ref White)","Black (ref White)","Grouped PoC (ref White)"), y = mean, color =  Model)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly
  scale_x_discrete(name = "", labels=c("Place of birth" = "Immigrant\n(ref Canadian-born)",
                            #"Income" = "Above poverty line\n(ref below)",
                            "Gender" = "Gender (ref male)",
                            "Ethnicities" = "4-category\nEthnicities (ref White)",
                            "Binary Ethnicity" = "Person of colour\n(ref White)"))
 dev.off() # If cairo_pICC was created.
ICCimputed %$% levels(Ethnicity)

t_ICCW %>%as_tibble() %>%  flextable() %>% 
  save_as_docx(path = "ttests weighed ICC ED 2020_binary race.docx")

dwplot(t2W)
summary(t2W) #output to excel
```
